diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 000000000..2b72b3bd9 --- /dev/null +++ b/.bazelrc @@ -0,0 +1,15 @@ +# Copyright 2019 The TCMalloc Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +build --cxxopt='-std=c++17' diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..d10cc0d08 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,74 @@ +# How to Contribute to TCMalloc + +We'd love to accept your patches and contributions to this project. There are +just a few small guidelines you need to follow. + +NOTE: If you are new to GitHub, please start by reading [Pull Request +howto](https://help.github.com/articles/about-pull-requests/) + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement. You (or your employer) retain the copyright to your contribution; +this simply gives us permission to use and redistribute your contributions as +part of the project. Head over to to see +your current agreements on file or to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Guidelines for Pull Requests + +* All submissions, including submissions by project members, require review. + We use GitHub pull requests for this purpose. Consult + [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more + information on using pull requests. + +* If you are a Googler, it is preferable to first create an internal CL and + have it reviewed and submitted. The code propagation process will deliver + the change to GitHub. + +* Create **small PRs** that are narrowly focused on **addressing a single concern**. + When PRs try to fix several things at a time, if only one fix is considered + acceptable, nothing gets merged and both author's & review's time is wasted. + Create more PRs to address different concerns and everyone will be happy. + +* Provide a good **PR description** as a record of **what** change is being + made and **why** it was made. Link to a GitHub issue if it exists. + +* Don't fix code style and formatting unless you are already changing that line + to address an issue. Formatting of modified lines may be done using + `git clang-format`. PRs with irrelevant changes won't be merged. If you do + want to fix formatting or style, do that in a separate PR. + +* Unless your PR is trivial, you should expect there will be reviewer comments + that you'll need to address before merging. We expect you to be reasonably + responsive to those comments, otherwise the PR will be closed after 2-3 weeks + of inactivity. + +* Maintain **clean commit history** and use **meaningful commit messages**. + PRs with messy commit history are difficult to review and won't be merged. + Use `rebase -i upstream/master` to curate your commit history and/or to + bring in latest changes from master (but avoid rebasing in the middle of a + code review). + +* Keep your PR up to date with upstream/master (if there are merge conflicts, + we can't really merge your change). + +* **All tests need to be passing** before your change can be merged. We + recommend you **run tests locally** (see below) + +* Exceptions to the rules can be made if there's a compelling reason for doing + so. That is - the rules are here to serve us, not the other way around, and + the rules need to be serving their intended purpose to be valuable. + +## TCMalloc Committers + +The current members of the TCMalloc engineering team are the only committers at +present. + +## Community Guidelines + +This project follows +[Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..62589edd1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 000000000..a8b0467f2 --- /dev/null +++ b/README.md @@ -0,0 +1,38 @@ +# TCMalloc + +This repository contains the TCMalloc C++ code. + +TCMalloc is Google's customized implementation of C's `malloc()` and C++'s +`operator new` used for memory allocation within our C and C++ code. TCMalloc is +a fast, multi-threaded malloc implementation. + +## Building TCMalloc + +[Bazel](https://bazel.build) is the official build system for TCMalloc. + +The [TCMalloc Platforms Guide](platforms) contains information on platform +support for TCMalloc. + +## Documentation + +All users of TCMalloc should consult the following documentation resources: + +* The [TCMalloc Overview](docs/overview) covers the basic architecture of + TCMalloc, and how that may affect configuration choices. +* The [TCMalloc Reference](docs/reference) covers the C and C++ TCMalloc API + endpoints. + +More advanced usages of TCMalloc may find the following documentation useful: + +* The [TCMalloc Tuning Guide](docs/tuning) covers the configuration choices in + more depth, and also illustrates other ways to customize TCMalloc. +* The [TCMalloc Design Doc](docs/design) covers how TCMalloc works underneath + the hood, and why certain design choices were made. Most developers will not + need this level of implementation detail. + +## License + +The TCMalloc library is licensed under the terms of the Apache +license. See LICENSE for more information. + +Disclaimer: This is not an officially supported Google product. diff --git a/WORKSPACE b/WORKSPACE new file mode 100644 index 000000000..3044365d4 --- /dev/null +++ b/WORKSPACE @@ -0,0 +1,51 @@ +# Copyright 2019 The TCMalloc Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +workspace(name = "com_google_tcmalloc") +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +# Abseil +http_archive( + name = "com_google_absl", + urls = ["https://github.com/abseil/abseil-cpp/archive/564001ae506a17c51fa1223684a78f05f91d3d91.zip"], + strip_prefix = "abseil-cpp-564001ae506a17c51fa1223684a78f05f91d3d91", + sha256 = "766ac184540dd24afc1542c30b8739e1490327e80738b5241bffb70b1005405c", +) + +# GoogleTest/GoogleMock framework. Used by most unit-tests. +http_archive( + name = "com_google_googletest", + urls = ["https://github.com/google/googletest/archive/d854bd6acc47f7f6e168007d58b5f509e4981b36.zip"], + strip_prefix = "googletest-d854bd6acc47f7f6e168007d58b5f509e4981b36", + sha256 = "5a3de3cb2141335255a850cc82be488aabefebca7d16abe15381bd93b6c48f9b", +) + +# Google benchmark. +http_archive( + name = "com_github_google_benchmark", + urls = ["https://github.com/google/benchmark/archive/16703ff83c1ae6d53e5155df3bb3ab0bc96083be.zip"], + strip_prefix = "benchmark-16703ff83c1ae6d53e5155df3bb3ab0bc96083be", + sha256 = "59f918c8ccd4d74b6ac43484467b500f1d64b40cc1010daa055375b322a43ba3", +) + +# C++ rules for Bazel. +http_archive( + name = "rules_cc", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/rules_cc/archive/7e650b11fe6d49f70f2ca7a1c4cb8bcc4a1fe239.zip", + "https://github.com/bazelbuild/rules_cc/archive/7e650b11fe6d49f70f2ca7a1c4cb8bcc4a1fe239.zip", + ], + strip_prefix = "rules_cc-7e650b11fe6d49f70f2ca7a1c4cb8bcc4a1fe239", + sha256 = "682a0ce1ccdac678d07df56a5f8cf0880fd7d9e08302b8f677b92db22e72052e", +) diff --git a/ci/linux_clang-latest_libcxx_bazel.sh b/ci/linux_clang-latest_libcxx_bazel.sh new file mode 100755 index 000000000..e6ae1dfa5 --- /dev/null +++ b/ci/linux_clang-latest_libcxx_bazel.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# +# Copyright 2019 The TCMalloc Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script that can be invoked to test tcmalloc in a hermetic environment +# using a Docker image on Linux. You must have Docker installed to use this +# script. + +set -euox pipefail + +if [ -z ${TCMALLOC_ROOT:-} ]; then + TCMALLOC_ROOT="$(realpath $(dirname ${0})/..)" +fi + +if [ -z ${STD:-} ]; then + STD="c++17" +fi + +if [ -z ${COMPILATION_MODE:-} ]; then + COMPILATION_MODE="fastbuild opt" +fi + +if [ -z ${EXCEPTIONS_MODE:-} ]; then + EXCEPTIONS_MODE="-fno-exceptions -fexceptions" +fi + +readonly DOCKER_CONTAINER="gcr.io/google.com/absl-177019/linux_clang-latest:20191018" + +for std in ${STD}; do + for compilation_mode in ${COMPILATION_MODE}; do + for exceptions_mode in ${EXCEPTIONS_MODE}; do + echo "--------------------------------------------------------------------" + time docker run \ + --volume="${TCMALLOC_ROOT}:/tcmalloc:ro" \ + --workdir=/tcmalloc \ + --cap-add=SYS_PTRACE \ + --rm \ + -e CC="/opt/llvm/clang/bin/clang" \ + -e BAZEL_COMPILER="llvm" \ + -e BAZEL_CXXOPTS="-std=${std}:-nostdinc++" \ + -e BAZEL_LINKOPTS="-L/opt/llvm/libcxx/lib:-lc++:-lc++abi:-lm:-Wl,-rpath=/opt/llvm/libcxx/lib" \ + -e CPLUS_INCLUDE_PATH="/opt/llvm/libcxx/include/c++/v1" \ + ${DOCKER_EXTRA_ARGS:-} \ + ${DOCKER_CONTAINER} \ + /usr/local/bin/bazel test ... \ + --compilation_mode="${compilation_mode}" \ + --copt="${exceptions_mode}" \ + --copt=-Werror \ + --define="absl=1" \ + --keep_going \ + --show_timestamps \ + --test_env="GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1" \ + --test_output=errors \ + --test_tag_filters=-benchmark \ + ${BAZEL_EXTRA_ARGS:-} + done + done +done diff --git a/ci/linux_clang-latest_libstdcxx_bazel.sh b/ci/linux_clang-latest_libstdcxx_bazel.sh new file mode 100755 index 000000000..b89df9409 --- /dev/null +++ b/ci/linux_clang-latest_libstdcxx_bazel.sh @@ -0,0 +1,82 @@ +#!/bin/bash +# +# Copyright 2019 The Abseil Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script that can be invoked to test abseil-cpp in a hermetic environment +# using a Docker image on Linux. You must have Docker installed to use this +# script. + +set -euox pipefail + +if [ -z ${ABSEIL_ROOT:-} ]; then + ABSEIL_ROOT="$(realpath $(dirname ${0})/..)" +fi + +if [ -z ${STD:-} ]; then + STD="c++17" +fi + +if [ -z ${COMPILATION_MODE:-} ]; then + COMPILATION_MODE="fastbuild opt" +fi + +if [ -z ${EXCEPTIONS_MODE:-} ]; then + EXCEPTIONS_MODE="-fno-exceptions -fexceptions" +fi + +readonly DOCKER_CONTAINER="gcr.io/google.com/absl-177019/linux_clang-latest:20191018" + +# USE_BAZEL_CACHE=1 only works on Kokoro. +# Without access to the credentials this won't work. +if [ ${USE_BAZEL_CACHE:-0} -ne 0 ]; then + DOCKER_EXTRA_ARGS="--volume=${KOKORO_KEYSTORE_DIR}:/keystore:ro ${DOCKER_EXTRA_ARGS:-}" + # Bazel doesn't track changes to tools outside of the workspace + # (e.g. /usr/bin/gcc), so by appending the docker container to the + # remote_http_cache url, we make changes to the container part of + # the cache key. Hashing the key is to make it shorter and url-safe. + container_key=$(echo ${DOCKER_CONTAINER} | sha256sum | head -c 16) + BAZEL_EXTRA_ARGS="--remote_http_cache=https://storage.googleapis.com/absl-bazel-remote-cache/${container_key} --google_credentials=/keystore/73103_absl-bazel-remote-cache ${BAZEL_EXTRA_ARGS:-}" +fi + +for std in ${STD}; do + for compilation_mode in ${COMPILATION_MODE}; do + for exceptions_mode in ${EXCEPTIONS_MODE}; do + echo "--------------------------------------------------------------------" + time docker run \ + --volume="${ABSEIL_ROOT}:/abseil-cpp:ro" \ + --workdir=/abseil-cpp \ + --cap-add=SYS_PTRACE \ + --rm \ + -e CC="/opt/llvm/clang/bin/clang" \ + -e BAZEL_COMPILER="llvm" \ + -e BAZEL_CXXOPTS="-std=${std}" \ + -e CPLUS_INCLUDE_PATH="/usr/include/c++/6" \ + ${DOCKER_EXTRA_ARGS:-} \ + ${DOCKER_CONTAINER} \ + /usr/local/bin/bazel test ... \ + --compilation_mode="${compilation_mode}" \ + --copt="${exceptions_mode}" \ + --copt=-Werror \ + --define="absl=1" \ + --keep_going \ + --show_timestamps \ + --test_env="GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1" \ + --test_env="TZDIR=/abseil-cpp/absl/time/internal/cctz/testdata/zoneinfo" \ + --test_output=errors \ + --test_tag_filters=-benchmark \ + ${BAZEL_EXTRA_ARGS:-} + done + done +done diff --git a/ci/linux_gcc-latest_libstdcxx_bazel.sh b/ci/linux_gcc-latest_libstdcxx_bazel.sh new file mode 100755 index 000000000..27b709716 --- /dev/null +++ b/ci/linux_gcc-latest_libstdcxx_bazel.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# +# Copyright 2019 The TCMalloc Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script that can be invoked to test tcmalloc in a hermetic environment +# using a Docker image on Linux. You must have Docker installed to use this +# script. + +set -euox pipefail + +if [ -z ${TCMALLOC_ROOT:-} ]; then + TCMALLOC_ROOT="$(realpath $(dirname ${0})/..)" +fi + +if [ -z ${STD:-} ]; then + STD="c++17" +fi + +if [ -z ${COMPILATION_MODE:-} ]; then + COMPILATION_MODE="fastbuild opt" +fi + +if [ -z ${EXCEPTIONS_MODE:-} ]; then + EXCEPTIONS_MODE="-fno-exceptions -fexceptions" +fi + +readonly DOCKER_CONTAINER="gcr.io/google.com/absl-177019/linux_gcc-latest:20200106" + +for std in ${STD}; do + for compilation_mode in ${COMPILATION_MODE}; do + for exceptions_mode in ${EXCEPTIONS_MODE}; do + echo "--------------------------------------------------------------------" + time docker run \ + --volume="${TCMALLOC_ROOT}:/tcmalloc:ro" \ + --workdir=/tcmalloc \ + --cap-add=SYS_PTRACE \ + --rm \ + -e CC="/usr/local/bin/gcc" \ + -e BAZEL_CXXOPTS="-std=${std}" \ + ${DOCKER_EXTRA_ARGS:-} \ + ${DOCKER_CONTAINER} \ + /usr/local/bin/bazel test ... \ + --compilation_mode="${compilation_mode}" \ + --copt="${exceptions_mode}" \ + --copt=-Werror \ + --define="absl=1" \ + --keep_going \ + --show_timestamps \ + --test_env="GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1" \ + --test_output=errors \ + --test_tag_filters=-benchmark \ + ${BAZEL_EXTRA_ARGS:-} + done + done +done diff --git a/docs/design.md b/docs/design.md new file mode 100644 index 000000000..c49e6ed7c --- /dev/null +++ b/docs/design.md @@ -0,0 +1,472 @@ +# TCMalloc : Thread-Caching Malloc + + + +## Motivation + +TCMalloc is a memory allocator designed as an alternative to the system default +allocator that has the following characteristics: + +* Fast, uncontended allocation and deallocation for most objects. Objects are + cached, depending on mode, either per-thread, or per-logical-CPU. Most + allocatations do not need to take locks, so there is low contention and good + scaling for mult-threaded applications. +* Flexible use of memory, so freed memory can be reused for different object + sizes, or returned to the OS. +* Low per object memory overhead by allocating "pages" of objects of the same + size. Leading to space-efficient representation of small objects. +* Low overhead sampling, enabling detailed insight into applications memory + usage. + +## Usage + +You use TCMalloc by specifying it as the `malloc` attribute on your binary rules +in Bazel. + +## Overview + +The following block diagram shows the rough internal structure of TCMalloc: + +![Diagram of TCMalloc internal structure](images/tcmalloc_internals.png "TCMalloc internal structure"){.center} + +We can break TCMalloc into three components. The front-end, middle-end, and +back-end. We will discuss these in more details in the following sections. A +rough breakdown of responsibilities is: + +* The front-end is a cache that provides fast allocation and deallocation of + memory to the application. +* The middle-end is responsible for refilling the front-end cache. +* The back-end handles fetching memory from the OS. + +Note that the front-end can be run in either per-CPU or legacy per-thread mode, +and the back-end can support either the hugepage aware pageheap or the legacy +pageheap. + +## The TCMalloc Front-end + +The front-end handles a request for memory of a particular size. The front-end +has a cache of memory that it can use for allocation or to hold free memory. +This cache is only accessible by a single thread at a time, so it does not +require any locks, hence most allocations and deallocations are fast. + +The front-end will satisfy any request if it has cached memory of the +appropriate size. If the cache for that particular size is empty, the front-end +will request a batch of memory from the middle-end to refill the cache. The +middle-end comprises the CentralFreeList and the TransferCache. + +If the middle-end is exhausted, or if the requested size is greater than the +maximum size that the front-end caches, a request will go to the back-end to +either satisfy the large allocation, or to refill the caches in the middle-end. +The back-end is also referred to as the PageHeap. + +There are two implementations of the TCMalloc front-end: + +* Originally it supported per-thread caches of objects (hence the name Thread + Caching Malloc). However, this resulted in memory footprints that scaled + with the number of threads. Modern applications can have large thread + counts, which result in either large amounts of aggregate per-thread memory, + or many threads having miniscule per-thread caches. +* More recently TCMalloc has supported per-CPU mode. In this mode each logical + CPU in the system has its own cache from which to allocate memory. Note: On + x86 a logical CPU is equivalent to a hyperthread. + +The differences between per-thread and per-CPU modes are entirely confined to +the implementations of malloc/new and free/delete. + +## Small and Large Object Allocation + +Allocations of "small" objects are mapped onto to one of +[60-80 allocatable size-classes](https://github.com/google/tcmalloc/blob/master/tcmalloc/size_classes.cc). +For example, an allocation of 12 bytes will get rounded up to the 16 byte +size-class. The size-classes are designed to minimize the amount of memory that +is wasted when rounding to the next largest size class. + +When compiled with `__STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8`, we use a set of +sizes aligned to 8 bytes for raw storage allocated with `::operator new`. This +smaller alignment minimizes wasted memory for many common allocation sizes (24, +40, etc.) which are otherwise rounded up to a multiple of 16 bytes. On many +compilers, this behavior is controlled by the `-fnew-alignment=...` flag. +When `__STDCPP_DEFAULT_NEW_ALIGNMENT__` is not +specified (or is larger than 8 bytes), we use standard 16 byte alignments for +`::operator new`. However, for allocations under 16 bytes, we may return an +object with a lower alignment, as no object with a larger alignment requirement +can be allocated in the space. + +When an object of a given size is requested, that request is +[mapped to a request of a particular class-size](https://github.com/google/tcmalloc/blob/master/tcmalloc/common.h), +and the returned memory is from that size-class. This means that the returned +memory is at least as large as the requested size. These class-sized allocations +are handled by the front-end. + +Objects of size greater than the limit defined by +[`kMaxSize`](https://github.com/google/tcmalloc/blob/master/tcmalloc/common.h) +are allocated directly from the [backend](#pageheap). As such they are not +cached in either the front or middle ends. Allocation requests for large object +sizes are rounded up to the [TCMalloc page size](#pagesizes). + +## Deallocation + +When an object is deallocated, the compiler will provide the size of the object +if it is known at compile time. If the size is not known, it will be looked up +in the [pagemap](#pagemap). If the object is small it will be put back into the +front-end cache. If the object is larger than kMaxSize it is returned directly +to the pageheap. + +### Per-CPU Mode + +In per-CPU mode a single large block of memory is allocated. The following +diagram shows how this slab of memory is divided between CPUs and how each CPU +uses a part of the slab to hold metadata as well as pointers to available +objects. + +![Memory layout of per-cpu data structures](images/per-cpu-cache-internals.png "Memory layout of per-cpu data structures"){.center} + +Each logical CPU is assigned a section of this memory to hold metadata and +pointers to available objects of particular size-classes. The metadata comprises +one /header/ block per size-class. The header has a pointer to the start of the +per-size-class array of pointers to objects, as well as a pointer to the +current, dynamic, maximum capacity and the current position within that array +segment. The static maximum capacity of each per-size-class array of pointers is +[determined at start time](https://github.com/google/tcmalloc/blob/master/tcmalloc/percpu_tcmalloc.h) +by the difference between the start of the array for this size-class and the +start of the array for the next size-class. + +At runtime the maximum number of items of a particular class-size that can be +stored in the per-cpu block will vary, but it can never exceed the statically +determined maximum capacity assigned at start up. + +When an object of a particular class-size is requested it is removed from this +array, when the object is freed it is added to the array. If the array is +[exhausted](https://github.com/google/tcmalloc/blob/master/tcmalloc/cpu_cache.h) +the array is refilled using a batch of objects from the middle-end. If the array +would +[overflow](https://github.com/google/tcmalloc/blob/master/tcmalloc/cpu_cache.h), +a batch of objects are removed from the array and returned to the middle-end. + +The amount of memory that can be cached is limited per-cpu by the parameter +`MallocExtension::SetMaxPerCpuCacheSize`. This means that the total amount of +cached memory depends on the number of active per-cpu caches. Consequently +machines with higher CPU counts can cache more memory. + +To avoid holding memory on CPUs where the application no longer runs, +`MallocExtension::ReleaseCpuMemory` frees objects held in a specified CPU's +caches. + +Within a CPU, the distribution of memory is managed across all the size classes +so as to keep the maximum amount of cached memory below the limit. Notice that +it is managing the maximum amount that can be cached, and not the amount that is +currently cached. On average the amount actually cached should be about half the +limit. + +The maximum capacity is increased when a class-size +[runs out of objects](https://github.com/google/tcmalloc/blob/master/tcmalloc/cpu_cache.cc), +as well as fetching more objects it considers +[increasing the capacity](https://github.com/google/tcmalloc/blob/master/tcmalloc/cpu_cache.cc) +of the class-size. It can increase the capacity of the size class up until the +total memory (for all class sizes) that the cache could hold reaches the per-cpu +limit or until the capacity of that size class reaches the hard-coded size limit +for that size-class. If the size-class has not reached the hard-coded limit, +then in order to increase the capacity it can +[steal](https://github.com/google/tcmalloc/blob/master/tcmalloc/cpu_cache.cc) +capacity from another size class on the same CPU. + +### Restartable Sequences and Per-CPU TCMalloc + +To work correctly, per-CPU mode relies on restartable sequences (man rseq(2)). A +restartable sequence is just a block of (assembly language) instructions, +largely like a typical function. A restriction of restartable sequences is that +they cannot write partial state to memory, the final instruction must be a +single write of the updated state. The idea of restartable sequences is that if +a thread is removed from a CPU (e.g. context switched) while it is executing a +restartable sequence, the sequence will be restarted from the top. Hence the +sequence will either complete without interruption, or be repeatedly restarted +until it completes without interruption. This is acheived without using any +locking or atomic instructions, thereby avoiding any contention in the sequence +itself. + +The practical implication of this for TCMalloc is that the code can use a +restartable sequence like +[TcmallocSlab_Push](https://github.com/google/tcmalloc/blob/master/tcmalloc/percpu_rseq_x86_64.S) +to fetch from or return an element to a per-CPU array without needing locking. +The restartable sequence ensures that either the array is updated without the +thread being interrupted, or the sequence is restarted if the thread was +interrupted (for example, by a context switch that enables a different thread to +run on that CPU). + +### Legacy Per-Thread mode + +In per-thread mode, TCMalloc assigns each thread a thread-local cache. Small +allocations are satisfied from this thread-local cache. Objects are moved +between the middle-end into and out of the thread-local cache as needed. + +A thread cache contains one singly linked list of free objects per size-class +(so if there are N class-sizes, there will be N corresponding linked lists), as +shown in the following diagram. + +![Structure of per-thread cache](images/per-thread-structure.png "Structure of per-thread cache"){.center} + +On allocation an object is removed from the appropriate size-class of the +per-thread caches. On deallocation, the object is prepended to the appropriate +size-class. Underflow and overflow are handled by accessing the middle-end to +either fetch more objects, or to return some objects. + +The maximum capacity of the per-thread caches is set by the parameter +`MallocExtension::SetMaxTotalThreadCacheBytes`. +However it is possible for the +total size to exceed that limit as each per-thread cache has a minimum size +[KMinThreadCacheSize](https://github.com/google/tcmalloc/blob/master/tcmalloc/common.h) +which is usually 512KiB. In the event that a thread wishes to increase its +capacity, it needs to +[scavenge](https://github.com/google/tcmalloc/blob/master/tcmalloc/thread_cache.cc) +capacity from other threads. + +When threads exit their cached memory is +[returned](https://github.com/google/tcmalloc/blob/master/tcmalloc/thread_cache.cc) +to the middle-end + +### Runtime Sizing of Front-end Caches + +It is important for the size of the front-end cache free lists to adjust +optimally. If the free list is too small, we'll need to go to the central free +list too often. If the free list is too big, we'll waste memory as objects sit +idle in there. + +Note that the caches are just as important for deallocation as they are for +allocation. Without a cache, each deallocation would require moving the memory +to the central free list. + +Per-CPU and per-thread modes have different implementations of a dynamic cache +sizing algorithm. + +* In per-thread mode the maximum number of objects that can be stored is + [increased](https://github.com/google/tcmalloc/blob/master/tcmalloc/thread_cache.cc) + up to a limit whenever more objects need to be fetched from the middle-end. + Similarly the capacity is + [decreased](https://github.com/google/tcmalloc/blob/master/tcmalloc/thread_cache.cc) + when we find that we have cached too many objects. The size of the cache is + also + [reduced](https://github.com/google/tcmalloc/blob/master/tcmalloc/thread_cache.cc) + should the total size of the cached objects exceed the per-thread limit. +* In per-CPU mode the + [capacity](https://github.com/google/tcmalloc/blob/master/tcmalloc/cpu_cache.cc) + of the free list is increased on whether we are alternating between + underflows and overflows (indicating that a larger cache might stop this + alternation). The capacity is + [reduced](https://github.com/google/tcmalloc/blob/master/tcmalloc/cpu_cache.cc) + when it has not been grown for a time and may therefore be over capacity. + +## TCMalloc Middle-end + +The middle-end is responsible for providing memory to the front-end and +returning memory to the back-end. The middle-end comprises the Transfer cache +and the Central free list. Although these are often referred to as singular, +there is one transfer cache and one central free list per class-size. These +caches are each protected by a mutex lock - so there is a serialization cost to +accessing them. + +### Transfer Cache + +When the front-end requests memory, or returns memory, it will reach out to the +transfer cache. + +The transfer cache holds an array of pointers to free memory, and it is quick to +move objects into this array, or fetch objects from this array on behalf of the +front-end. + +The transfer cache gets its name from situations where one thread is allocating +memory that is deallocated by another thread. The transfer cache allows memory +to rapidly flow between two different threads. + +If the transfer cache is unable to satisfy the memory request, or has +insufficient space to hold the returned objects, it will access the central free +list. + +### Central Free List + +The central free list manages memory in "[spans](#spans)", a span is a +collection of one or more "[TCMalloc pages](#pagesizes)" of memory. These terms +will be explained in the next couple of sections. + +A request for one or more objects is satisfied by the central free list by +[extracting](https://github.com/google/tcmalloc/blob/master/tcmalloc/central_freelist.cc) +objects from spans until the request is satisfied. If there are insufficient +available objects in the spans, more spans are requested from the back-end. + +When objects are +[returned to the central free list](https://github.com/google/tcmalloc/blob/master/tcmalloc/central_freelist.cc), +each object is mapped to the span to which it belongs (using the +[pagemap](#pagemap) and then released into that span. If all the objects that +reside in a particular span are returned to it, the entire span gets returned to +the back-end. + +### Pagemap and Spans + +The heap managed by TCMalloc is divided into [pages](#pagesize) of a +compile-time determined size. A run of contiguous pages is represented by a +`Span` object. A span can be used to manage a large object that has been handed +off to the application, or a run of pages that have been split up into a +sequence of small objects. If the span manages small objects, the size-class of +the objects is recorded in the span. + +The pagemap is used to look up the span to which an object belongs, or to +identify the class-size for a given object. + +TCMalloc uses a 2-level or 3-level +[radix tree](https://github.com/google/tcmalloc/blob/master/tcmalloc/pagemap.h) +in order to map all possible memory locations onto spans. + +The following diagram shows how a radix-2 pagemap is used to map the address of +objects onto the spans that control the pages where the objects reside. In the +diagram **span A** covers two pages, and **span B** covers 3 pages. + +![The pagemap maps objects to spans.](images/pagemap.png "The pagemap maps objects to spans."){.center} + +Spans are used in the middle-end to determine where to place returned objects, +and in the back-end to manage the handling of page ranges. + +### Storing Small Objects in Spans + +A span contains a pointer to the base of the TCMalloc pages that the span +controls. For small objects those pages are divided into at most 216 +objects. This value is selected so that within the span we can refer to objects +by a two-byte index. + +This means that we can use an +[unrolled linked list](https://en.wikipedia.org/wiki/Unrolled_linked_list) to +holded the objects. For example, if we have eight byte objects we can store the +indexes of three ready-to-use objects, and use the forth slot to store the index +of the next object in the chain. This datastructure reduces cache misses over a +fully linked list. + +The other advantage of using two byte indexes is that we're able to use spare +capacity in the span itself to +[cache four objects](https://github.com/google/tcmalloc/blob/master/tcmalloc/span.h). + +When we have +[no available objects](https://github.com/google/tcmalloc/blob/master/tcmalloc/central_freelist.cc) +for a class-size we need to fetch a new span from the pageheap and +[populate](https://github.com/google/tcmalloc/blob/master/tcmalloc/central_freelist.cc) +it. + +## TCMalloc Page Sizes {#pagesizes} + +TCMalloc can be built with various +["page sizes"](https://github.com/google/tcmalloc/blob/master/tcmalloc/common.h) +. Note that these do not correspond to the page size used in the TLB of the +underlying hardware. These TCMalloc page sizes are currently 4KiB, 8KiB, 32KiB, +and 256KiB. + +A TCMalloc page either holds multiple objects of a particular size, or is used +as part of a group to hold an object of size greater than a single page. If an +entire page becomes free it will be returned to the back-end (the pageheap) and +can later be repurposed to hold objects of a different size (or returned to the +OS). + +Small pages are better able to handle the memory requirements of the application +with less overhead. For example, a half-used 4KiB page will have 2KiB left over +versus a 32KiB page which would have 16KiB. Small pages are also more likely to +become free. For example, a 4KiB page can hold eight 512-byte objects versus 64 +objects on a 32KiB page; and there is much less chance of 32 objects being free +at the same time than there is of eight becoming free. + +Large pages result in less need to fetch and return memory from the back-end. A +single 32KiB page can hold eight times the objects of a 4KiB page, and this can +result in the costs of managing the larger pages being smaller. It also takes +fewer large pages to map the entire virtual address space. TCMalloc has a +[pagemap](https://github.com/google/tcmalloc/blob/master/tcmalloc/pagemap.h) +which maps a virtual address onto the structures that manage the objects in that +address range. Larger pages mean that the pagemap needs fewer entries and is +therefore smaller. + +Consequently, it makes sense for applications with small memory footprints, or +that are sensitive to memory footprint size to use smaller TCMalloc page sizes. +Applications with large memory footprints are likely to benefit from larger +TCMalloc page sizes. + +## TCMalloc Back-end {#pageheap} + +The back-end of TCMalloc has three jobs: + +* It manages large chunks of unused memory. +* It is responsible for fetching memory from the OS when there is no suitably + sized memory available to fulfill an allocation requestion. +* It is responsible for returning unneeded memory back to the OS. + +There are two backends for TCMalloc: + +* The Legacy pageheap which manages memory in TCMalloc page sized chunks. +* The hugepage aware pageheap which manages memory in chunks of hugepage + sizes. Managing memory in hugepage chunks enables the allocator to improve + application performance by reducing TLB misses. + +### Legacy Pageheap + +The legacy pageheap is an array of free lists for particular lengths of +contiguous pages of available memory. For `k < 256`, the `k`th entry is a free +list of runs that consist of `k` TCMalloc pages. The `256`th entry is a free +list of runs that have length `>= 256` pages: + +![Layout of legacy pageheap.](images/legacy_pageheap.png "Layout of legacy pageheap."){.center} + +An allocation for `k` pages is satisfied by looking in the `k`th free list. If +that free list is empty, we look in the next free list, and so forth. +Eventually, we look in the last free list if necessary. If that fails, we fetch +memory from the system `mmap`. + +If an allocation for `k` pages is satisfied by a run of pages of length `> k` , +the remainder of the run is re-inserted back into the appropriate free list in +the pageheap. + +When a range of pages are returned to the pageheap, the adjacent pages are +checked to determine if they now form a contiguous region, if that is the case +then the pages are concatenated and placed into the appropriate free list. + +### Hugepage Aware Pageheap + +The objective of the hugepage aware allocator is to hold memory in hugepage size +chunks. On x86 a hugepage is 2MiB in size. To do this the back-end has three +different caches: + +* The filler cache holds hugepages which have had some memory allocated from + them. This can be considered to be similar to the legacy pageheap in that it + holds linked lists of memory of a particular number of TCMalloc pages. + Allocation requests for sizes of less than a hugepage in size are + (typically) returned from the filler cache. If the filler cache does not + have sufficient available memory it will request additional hugepages from + which to allocate. +* The region cache which handles allocations of greater than a hugepage. This + cache allows allocations to straddle multiple hugepages, and packs multiple + such allocations into a contiguous region. This is particularly useful for + allocations that slightly exceed the size of a hugepage (for example, 2.1 + MiB). +* The hugepage cache handles large allocations of at least a hugepage. There + is overlap in usage with the region cache, but the region cache is only + enabled when it is determined (at runtime) that the allocation pattern would + benefit from it. + +## Caveats {#caveats} + +TCMalloc will reserve some memory for metadata at start up. The amount of +metadata will grow as the heap grows. In particular the pagemap will grow with +the virtual address range that TCMalloc uses, and the spans will grow as the +number of active pages of memory grows. In per-CPU mode, TCMalloc will reserve a +slab of memory per-CPU (typically 256 KiB), which, on systems with large numbers +of logical CPUs, can lead to a multi-megabyte footprint. + +It is worth noting that TCMalloc requests memory from the OS in large chunks +(typically 1 GiB regions). The address space is reserved, but not backed by +physical memory until it is used. Because of this approach the VSS of the +application can be substantially larger than the RSS. A side effect of this is +that trying to limit an application's memory use by restricting VSS will fail +long before the application has used that much physical memory. + +Don't try to load TCMalloc into a running binary (e.g., using JNI in Java +programs). The binary will have allocated some objects using the system malloc, +and may try to pass them to TCMalloc for deallocation. TCMalloc will not be able +to handle such objects. diff --git a/docs/images/legacy_pageheap.png b/docs/images/legacy_pageheap.png new file mode 100644 index 000000000..f93c4dc3e Binary files /dev/null and b/docs/images/legacy_pageheap.png differ diff --git a/docs/images/pagemap.png b/docs/images/pagemap.png new file mode 100644 index 000000000..4a712c15b Binary files /dev/null and b/docs/images/pagemap.png differ diff --git a/docs/images/per-cpu-cache-internals.png b/docs/images/per-cpu-cache-internals.png new file mode 100644 index 000000000..7e10a1aef Binary files /dev/null and b/docs/images/per-cpu-cache-internals.png differ diff --git a/docs/images/per-thread-structure.png b/docs/images/per-thread-structure.png new file mode 100644 index 000000000..596289d25 Binary files /dev/null and b/docs/images/per-thread-structure.png differ diff --git a/docs/images/spanmap.gif b/docs/images/spanmap.gif new file mode 100644 index 000000000..a0627f6a7 Binary files /dev/null and b/docs/images/spanmap.gif differ diff --git a/docs/images/tcmalloc_internals.png b/docs/images/tcmalloc_internals.png new file mode 100644 index 000000000..5eb0e59f2 Binary files /dev/null and b/docs/images/tcmalloc_internals.png differ diff --git a/docs/overview.md b/docs/overview.md new file mode 100644 index 000000000..ea2dd874a --- /dev/null +++ b/docs/overview.md @@ -0,0 +1,98 @@ +# TCMalloc Overview + +TCMalloc is Google's customized implementation of C's `malloc()` and C++'s +`operator new` used for memory allocation within our C and C++ code. This custom +memory allocation framework is an alternative to the one provided by the C +standard library (on Linux usually through `glibc`) and C++ standard library. +TCMalloc is designed to be more efficient at scale than other implementations. + +Specifically, TCMalloc provides the following benefits: + +* Performance scales with highly parallel applications. +* Optimizations brought about with recent C++14 and C++17 standard enhancements, + and by diverging slightly from the standard where performance benefits + warrant. (These are noted within the [TCMalloc Reference](reference).) +* Extensions to allow performance improvements under certain architectures, and + additional behavior such as metric gathering. + +## TCMalloc Cache Operation Mode + +TCMalloc may operate in one of two fashions: + +* (default) per-CPU caching, where TCMalloc maintains memory caches local to + individual logical cores. Per-CPU caching is enabled when running TCMalloc on + any Linux kernel that utilizes restartable sequences (RSEQ). Support for RSEQ + was merged in Linux 4.18. +* per-thread caching, where TCMalloc maintains memory caches local to + each application thread. If RSEQ is unavailable, TCMalloc reverts to using + this legacy behavior. + +NOTE: the "TC" in TCMalloc refers to Thread Caching, which was originally a +distinguishing feature of TCMalloc; the name remains as a legacy. + +In both cases, these cache implementations allows TCMalloc to avoid requiring +locks for most memory allocations and deallocations. + +## TCMalloc Features + +TCMalloc provides APIs for dynamic memory allocation: `malloc()` using the C +API, and `::operator new` using the C++ API. TCMalloc, like most allocation +frameworks, manages this memory better than raw memory requests (such as through +`mmap()`) by providing several optimizations: + +* Performs allocations from the operating system by managing + specifically-sized chunks of memory (called "pages"). Having all of these + chunks of memory the same size allows TCMalloc to simplify bookkeeping. +* Devoting separate pages (or runs of pages called "Spans" in TCMalloc) to + specific object sizes. For example, all 16-byte objects are placed within + a "Span" specifically allocated for objects of that size. Operations to get or + release memory in such cases are much simpler. +* Holding memory in *caches* to speed up access of commonly-used objects. + Holding such caches even after deallocation also helps avoid costly system + calls if such memory is later re-allocated. + +The cache size can also affect performance. The larger the cache, the less any +given cache will overflow or get exhausted, and therefore require a lock to get +more memory. TCMalloc extensions allow you to modify this cache size, though the +default behavior should be preferred in most cases. For more information, +consult the [TCMalloc Tuning Guide](tuning). + +Additionally, TCMalloc exposes telemetry about the state of the application's +heap via `MallocExtension`. This can be used for gathering profiles of the live +heap, as well as a snapshot taken near the heap's highwater mark size (a peak +heap profile). + +## The TCMalloc API + +TCMalloc implements the C and C++ dynamic memory API endpoints from the C11, +C++11, C++14, and C++17 standards. + +From C++, this includes + +* The basic `::operator new`, `::operator delete`, and array variant + functions. +* C++14's sized `::operator delete` +* C++17's overaligned `::operator new` and `::operator delete` functions. + +Unlike in the standard implementations, TCMalloc does not throw an exception +when allocations fail, but instead crashes directly. Such behavior can be used +as a performance optimization for move constructors not currently marked +`noexcept`; such move operations can be allowed to fail directly due to +allocation failures. In [Abseil](https://abseil.io/docs/cpp/guides/base), these +are enabled with `-DABSL_ALLOCATOR_NOTHROW`. + +From C, this includes `malloc`, `calloc`, `realloc`, and `free`. + +The TCMalloc API obeys the behavior of C90 DR075 and +[DR445](http://www.open-std.org/jtc1/sc22/wg14/www/docs/summary.htm#dr_445) +which states: + + The alignment requirement still applies even if the size is too small for + any object requiring the given alignment. + +In other words, `malloc(1)` returns `alignof(std::max_align_t)`-aligned pointer. +Based on the progress of +[N2293](http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2293.htm), we may relax +this alignment in the future. + +For more complete information, consult the [TCMalloc Reference](reference). diff --git a/docs/platforms.md b/docs/platforms.md new file mode 100644 index 000000000..4457bef9a --- /dev/null +++ b/docs/platforms.md @@ -0,0 +1,52 @@ +# TCMalloc Platforms + +The TCMalloc code is supported on the following platforms. By "platforms", +we mean the union of operating system, architecture (e.g. little-endian vs. +big-endian), compiler, and standard library. + +## Language Requirements + +TCMalloc requires a code base that supports C++17 and our code is +C++17-compliant. C code is required to be compliant to C11. + +We guarantee that our code will compile under the following compilation flags: + +Linux: + +* gcc, clang 5.0+: `-std=c++17` + +(TL;DR; All code at this time must be built under C++17. We will update this +list if circumstances change.) + +## Supported Platforms + +The document below lists each platform, broken down by Operating System, +Archiecture, Specific Compiler, and Standard Library implementation. + +### Linux + +**Supported** + + + + + + + + + + + + + + + + + + + + +
Operating SystemEndianness/Word SizeProcessor ArchitecturesCompilers*Standard Libraries
Linuxlittle-endian, 64-bitx86, PPCgcc 9.2+
clang 5.0+
libstdc++
libc++
+ +\* We test on gcc 9.2, though gcc versions (which support C++17) prior to that +release should also work. diff --git a/docs/reference.md b/docs/reference.md new file mode 100644 index 000000000..c57d9bddb --- /dev/null +++ b/docs/reference.md @@ -0,0 +1,244 @@ +# TCMalloc Basic Reference + +TCMalloc provides implementations for C and C++ library memory management +routines (`malloc()`, etc.) provided within the C and C++ standard libraries. + +Currently, TCMalloc requires code that conforms to the C11 C standard library +and the C++11, C++14, or C++17 C++ standard library. + +NOTE: although the C API in this document is specific to the C language, the +entire TCMalloc API itself is designed to be callable directly within C++ code +(and we expect most usage to be from C++). The documentation in this section +assumes C constructs (e.g. `size_t`) though invocations using equivalent C++ +constructs of aliased types (e.g. `std::size_t`) are instrinsically supported. + +## C++ API + +We implement the variants of `operator new` and `operator delete` from the +C++11, C++14, C++17 standards exposed within the `` header file. This +includes: + +* The basic `::operator new()`, `::operator delete()`, and array variant + functions. +* C++14's sized `::operator delete()` +* C++17's overaligned `::operator new()` and `::operator delete()` functions. + As required by the C++ standard, memory allocated using an aligned `operator + new` function must be deallocated with an aligned `operator delete`. + +### `::operator new` / `::operator new[]` + +``` +void* operator new(std::size_t count); +void* operator new(std::size_t count, const std::nothrow_t& tag) noexcept; +void* operator new(std::size_t count, std::align_val_t al); // C++17 +void* operator new(std::size_t count, + std::align_val_t al, const std::nothrow_t&) noexcept; // C++17 + +void* operator new[](std::size_t count); +void* operator new[](std::size_t count, const std::nothrow_t& tag) noexcept; +void* operator new[](std::size_t count, std::align_val_t al); // C++17 +void* operator new[](std::size_t count, + std::align_val_t al, const std::nothrow_t&) noexcept; // C++17 +``` + +`operator new`/`operator new[]` allocates `count` bytes. They may be invoked +directly but are more commonly invoked as part of a *new*-expression. + +When `__STDCPP_DEFAULT_NEW_ALIGNMENT__` is not specified (or is larger than 8 +bytes), we use standard 16 byte alignments for `::operator new` without a +`std::align_val_t` argument. However, for allocations under 16 bytes, we may +return an object with a lower alignment, as no object with a larger alignment +requirement can be allocated in the space. When compiled with +`__STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8`, we use a set of sizes aligned to 8 +bytes for raw storage allocated with `::operator new`. + +NOTE: On many platforms, the value of `__STDCPP_DEFAULT_NEW_ALIGNMENT__` can be +configured by the `-fnew-alignment=...` flag. + +The `std::align_val_t` variants provide storage suitably aligned to the +requested alignment. + +If the allocation is unsuccessful, a failure terminates the program. + +NOTE: unlike in the C++ standard, we do not throw an exception in case of +allocation failure, or invoke `std::get_new_handler()` repeatedly in an +attempt to successfully allocate, but instead crash directly. Such behavior can +be used as a performance optimization for move constructors not currently marked +`noexcept`; such move operations can be allowed to fail directly due to +allocation failures. Within Abseil code, these direct allocation failures are +enabled with the Abseil build-time configuration macro +[`ABSL_ALLOCATOR_NOTHROW`](https://abseil.io/docs/cpp/guides/base#abseil-exception-policy). + +If the `std::no_throw_t` variant is utilized, upon failure, `::operator new` +will return `nullptr` instead. + +### `::operator delete` / `::operator delete[]` + +``` +void operator delete(void* ptr) noexcept; +void operator delete(void* ptr, std::size_t sz) noexcept; +void operator delete(void* ptr, std::align_val_t al) noexcept; +void operator delete(void* ptr, std::size_t sz, + std::align_val_t all) noexcept; + +void operator delete[](void* ptr) noexcept; +void operator delete[](void* ptr, std::size_t sz) noexcept; // C++14 +void operator delete[](void* ptr, std::align_val_t al) noexcept; // C++17 +void operator delete[](void* ptr, std::size_t sz, + std::align_val_t al) noexcept; // C++17 +``` + +`::operator delete`/`::operator delete[]` deallocate memory previously allocated +by a corresponding `::operator new`/`::operator new[]` call respectively. It is +commonly invoked as part of a *delete*-expression. + +Sized delete is used as a critical performance optimization, eliminating the +need to perform a costly pointer-to-size lookup. + +### Extensions + +We also expose a prototype of +[P0901](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p0901r5.html) in +https://github.com/google/tcmalloc/blob/master/tcmalloc/malloc_extension.h with +`tcmalloc_size_returning_operator_new()`. This returns both memory and the size +of the allocation in bytes. It can be freed with `::operator delete`. + +## C API + +The C standard library specifies the API for dynamic memory management within +the `` header file. Implementations require C11 or greater. + +TCMalloc provides implementation for the following C API functions: + +* `malloc()` +* `calloc()` +* `realloc()` +* `free()` +* `aligned_alloc()` + +For `malloc`, `calloc`, and `realloc`, we obey the behavior of C90 DR075 and +[DR445](http://www.open-std.org/jtc1/sc22/wg14/www/docs/summary.htm#dr_445) +which states: + + The alignment requirement still applies even if the size is too small for + any object requiring the given alignment. + +In other words, `malloc(1)` returns `alignof(std::max_align_t)`-aligned pointer. +Based on the progress of +[N2293](http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2293.htm), we may relax +this alignment in the future. + +Additionally, TCMalloc provides an implementation for the following POSIX +standard library function, available within glibc: + +* `posix_memalign()` + +TCMalloc also provides implementations for the following obsolete functions +typically provided within libc implementations: + +* `cfree()` +* `memalign()` +* `valloc()` +* `pvalloc()` + +Documentation is not provided for these obsolete functions. The implementations +are provided only for compatibility purposes. + +### `malloc()` + +``` +void* malloc(size_t size); +``` + +`malloc` allocates `size` bytes of memory and returns a `void *` pointer to the +start of that memory. + +`malloc(0)` returns a non-NULL zero-sized pointer. (Attempting to access memory +at this location is undefined.) If `malloc()` fails for some reason, it returns +NULL. + +### `calloc()` + +``` +void* calloc(size_t num, size_t size); +``` + +`calloc()` allocates memory for an array of objects, zero-initializes all bytes +in allocated storage, and if allocation succeeds, returns a pointer to the first +byte in the allocated memory block. + +`calloc(num, 0)` or `calloc(0, size)` returns a non-NULL zero-sized pointer. +(Attempting to access memory at this location is undefined.) If `calloc()` fails +for some reason, it returns NULL. + +### `realloc()` + +``` +void* realloc(void *ptr, size_t new_size); +``` + +`realloc()` re-allocates memory for an existing region of memory by either +expanding or contracting the memory based on the passed `new_size` in bytes, +returning a `void*` pointer to the start of that memory (which may not change); +it does not perform any initialization of new areas of memory. + +`realloc(OBJ*, 0)` returns a NULL pointer. If `realloc()` fails for some reason, +it also returns NULL. + +### `aligned_alloc()` + +``` +void* aligned_alloc(size_t alignment, size_t size); +``` + +`aligned_alloc()` allocates `size` bytes of memory with alignment of size +`alignment` and returns a `void *` pointer to the start of that memory; it does +not perform any initialization. + +The `size` parameter must be an integral multiple of `alignment` and `alignment` +must be a power of two. If either of these cases is not satisfied, +`aligned_alloc()` will fail and return a NULL pointer. + +`aligned_alloc` with `size=0` returns a non-NULL zero-sized pointer. +(Attempting to access memory at this location is undefined.) + +### `posix_memalign()` + +``` +int posix_memalign(void **memptr, size_t alignment, size_t size); +``` + +`posix_memalign()`, like `aligned_alloc()` allocates `size` bytes of memory with +alignment of size `alignment` to the start of memory pointed to by `**memptr`; +it does not perform any initialization. This pointer can be cast to the desired +type of data pointer in order to be dereferenceable. If the alignment allocation +succeeds, `posix_memalign()` returns `0`; otherwise it returns an error value. + +`posix_memalign` is similar to `aligned_alloc()` but `alignment` be a power of +two multiple of `sizeof(void *)`. If the constraints are not satisfied, +`posix_memalign()` will fail. + +`posix_memalign` with `size=0` returns a non-NULL zero-sized pointer. +(Attempting to access memory at this location is undefined.) + +### `free()` + +``` +void free(void* ptr); +``` + +`free()` deallocates memory previously allocated by `malloc()`, `calloc()`, +`aligned_alloc()`, `posix_memalign()`, or `realloc()`. If `free()` is passed a +null pointer, the function does nothing. + +### Extensions + +These are contained in +https://github.com/google/tcmalloc/blob/master/tcmalloc/malloc_extension.h. + +* `nallocx(size_t size, int flags)` - Returns the number of bytes that would + be allocated by `malloc(size)`, subject to the alignment specified in + `flags`. +* `sdallocx(void* ptr, size_t size, int flags)` - Deallocates memory allocated + by `malloc` or `memalign`. It takes a size parameter to pass the original + allocation size, improving deallocation performance. diff --git a/docs/sampling.md b/docs/sampling.md new file mode 100644 index 000000000..e46414879 --- /dev/null +++ b/docs/sampling.md @@ -0,0 +1,57 @@ +# How sampling in TCMalloc works. + +## Introduction + +TCMalloc uses sampling to get representative data on memory usage and +allocation. How this works is not well documented. This doc attempts to at least +partially fix this. + +## Sampling + +We chose to sample an allocation every N bytes where N is a +[random value](https://github.com/google/tcmalloc/blob/master/tcmalloc/sampler.cc) +with a mean set by the +[profile sample rate](https://github.com/google/tcmalloc/blob/master/tcmalloc/malloc_extension.h). +By default this is every +[2MiB](https://github.com/google/tcmalloc/blob/master/tcmalloc/common.h). + +## How We Sample Allocations + +When we +[pick an allocation](https://github.com/google/tcmalloc/blob/master/tcmalloc/sampler.cc) +to sample we do some +[additional processing around that allocation](https://github.com/google/tcmalloc/blob/master/tcmalloc/tcmalloc.cc) - +recording stack, alignment, request size, and allocation size. Then we go +[through all the active samplers](https://github.com/google/tcmalloc/blob/master/tcmalloc/tcmalloc.cc) +and tell them about the allocation. We also tell the +[span that we're sampling it](https://github.com/google/tcmalloc/blob/master/tcmalloc/tcmalloc.cc) - +we can do this because we do sampling at tcmalloc page sizes, so each sample +corresponds to a particular page in the pagemap. + +## How We Free Sampled Objects + +Each sampled allocation is tagged. So we can +quickly[ test whether a particular allocation might be a sample](https://github.com/google/tcmalloc/blob/master/tcmalloc/tcmalloc.cc). + +When we are done with the sampled span +[we release it](https://github.com/google/tcmalloc/blob/master/tcmalloc/span.cc). + +## How Do We Handle Heap and Fragmentation Profiling + +To handle +[heap](https://github.com/google/tcmalloc/blob/master/tcmalloc/tcmalloc.cc) +and +[fragmentation](https://github.com/google/tcmalloc/blob/master/tcmalloc/tcmalloc.cc) +profiling we just need to traverse the list of sampled objects and compute +either their degree of fragmentation, or the amount of heap they consume. + +## How Do We Handle Allocation Profiling + +Allocation profiling reports a list of sampled allocations during a length of +time. We start an +[allocation profile](https://github.com/google/tcmalloc/blob/master/tcmalloc/malloc_extension.h), +then wait until time has elapsed, then call `Stop` on the token. and report the +profile. + +While the allocation sampler is active it is added to the list of samplers for +allocations and removed from the list when it is claimed. diff --git a/docs/stats.md b/docs/stats.md new file mode 100644 index 000000000..db77dc72b --- /dev/null +++ b/docs/stats.md @@ -0,0 +1,720 @@ +# Understanding Malloc Stats + +## Getting Malloc Stats + +Human-readable statistics can be obtained by calling +`tcmalloc::MallocExtension::GetStats()`. + +## Understanding Malloc Stats Output + +### It's A Lot Of Information + +The output contains a lot of information. Much of it can be considered debug +info that's interesting to folks who are passingly familiar with the internals +of TCMalloc, but potentially not that useful for most people. + +### Summary Section + +The most generally useful section is the first few lines: + +``` +------------------------------------------------ +MALLOC: 16709337136 (15935.3 MiB) Bytes in use by application +MALLOC: + 503480320 ( 480.2 MiB) Bytes in page heap freelist +MALLOC: + 363974808 ( 347.1 MiB) Bytes in central cache freelist +MALLOC: + 120122560 ( 114.6 MiB) Bytes in per-CPU cache freelist +MALLOC: + 415232 ( 0.4 MiB) Bytes in transfer cache freelist +MALLOC: + 76920 ( 0.1 MiB) Bytes in thread cache freelists +MALLOC: + 52258953 ( 49.8 MiB) Bytes in malloc metadata +MALLOC: ------------ +MALLOC: = 17749665929 (16927.4 MiB) Actual memory used (physical + swap) +MALLOC: + 333905920 ( 318.4 MiB) Bytes released to OS (aka unmapped) +MALLOC: ------------ +MALLOC: = 18083571849 (17245.8 MiB) Virtual address space used +``` + +* **Bytes in use by application:** Number of bytes that the application is + actively using to hold data. This is computed by the bytes requested from + the OS minus any bytes that are held in caches and other internal data + structures. +* **Bytes in page heap freelist:** The pageheap is a structure that holds + memory ready for TCMalloc to use it. This memory is not actively being used, + and could be returned to the OS. [See TCMalloc tuning](tuning.md) +* **Bytes in central cache freelist:** This is the amount of memory currently + held in the central freelist. This is a structure that holds partially used + "[spans](/third_party/tcmalloc/g3doc/stats.md#more-detail-on-metadata)" of + memory. The spans are partially used because some memory has been allocated + from them, but not entirely used - since they have some free memory on them. +* **Bytes in per-CPU cache freelist:** In per-cpu mode (which is the default) + each CPU holds some memory ready to quickly hand to the application. The + maximum size of this per-cpu cache is tunable. + [See TCMalloc tuning](tuning.md) +* **Bytes in transfer cache freelist:** The transfer cache is can be + considered another part of the central freelist. It holds memory that is + ready to be provided to the application for use. +* **Bytes in thread cache freelists:** The TC in TCMalloc stands for thread + cache. Originally each thread held its own cache of memory to provide to the + application. Since the change of default the thread caches are used by very + few applications. However, TCMalloc starts in per-thread mode, so there may + be some memory left in per-thread caches from before it switches into + per-cpu mode. +* **Bytes in malloc metadata:** the size of the data structures used for + tracking memory allocation. This will grow as the amount of memory used + grows. + +There's a couple of summary lines: + +* **Actual memory used:** This is the total amount of memory that TCMalloc + thinks it is using in the various categories. This is computed from the size + of the various areas, the actual contribution to RSS may be larger or + smaller than this value. The true RSS may be less if memory is not mapped + in. In some cases RSS can be larger if small regions end up being mapped + with huge pages. This does not count memory that TCMalloc is not aware of + (eg memory mapped files, text segments etc.) +* **Bytes released to OS:** TCMalloc can release memory back to the OS (see + [tcmalloc tuning](tuning.md)), and this is the upper bound on the amount of + released memory. However, it is up to the OS as to whether the act of + releasing the memory actually reduces the RSS of the application. The code + uses MADV_DONTNEED which tells the OS that the memory is no longer needed, + but does not actually cause it to be physically removed. +* **Virtual address space used:** This is the amount of virtual address space + that TCMalloc believes it is using. This should match the later section on + requested memory. There are other ways that an application can increase its + virtual address space, and this statistic does not capture them. + +### More Detail On Metadata + +The next section gives some insight into the amount of metadata that TCMalloc is +using. This is really debug information, and not very actionable. + +``` +MALLOC: 236176 Spans in use +MALLOC: 238709 ( 10.9 MiB) Spans created +MALLOC: 8 Thread heaps in use +MALLOC: 46 ( 0.0 MiB) Thread heaps created +MALLOC: 13517 Stack traces in use +MALLOC: 13742 ( 7.2 MiB) Stack traces created +MALLOC: 0 Table buckets in use +MALLOC: 2808 ( 0.0 MiB) Table buckets created +MALLOC: 11665416 ( 11.1 MiB) Pagemap bytes used +MALLOC: 4067336 ( 3.9 MiB) Pagemap root resident bytes +``` + +* **Spans:** structures that hold multiple + [pages](/third_party/tcmalloc/g3doc/stats.md#page-sizes) of allocatable + objects. +* **Thread heaps:** These are the per-thread structures used in per-thread + mode. +* **Stack traces:** These hold metadata for each sampled object. +* **Table buckets:** These hold data for stack traces for sampled events. +* **Pagemap:** This data structure supports the mapping of object addresses to + information about the objects held on the page. The pagemap root is a + potentially large array, and it is useful to know how much is actually + memory resident. + +### Page Sizes + +There are three relevant "page" sizes for systems and TCMalloc. It's important +to be able to disambiguate them. + +* **System default page size:** this is not reported by TCMalloc. This is 4KiB + on x86. It's not referred to in TCMalloc, and it's not important, but it's + important to know that it is different from the sizes of pages used in + TCMalloc. +* **TCMalloc page size:** This is the basic unit of memory management for + TCMalloc. Objects on the same page are the same number of bytes in size. + Internally TCMalloc manages memory in chunks of this size. TCMalloc supports + 4 sizes: 4KiB (small but slow), 8KiB (the default), 32 KiB (large), 256 KiB + (256 KiB pages). There's trade-offs around the page sizes: + * Smaller page sizes are more memory efficient because we have less + fragmentation (ie left over space) when trying to provide the requested + amount of memory using 4KiB chunks. It's also more likely that all the + objects on a 4KiB page will be freed allowing the page to be returned + and used for a different size of data. + * Larger pages result in fewer fetches from the page heap to provide a + given amount of memory. They also keep memory of the same size in closer + proximity. +* **TCMalloc hugepage size:** This is the size of a hugepage on the system, + for x86 this is 2MiB. This size is used as a unit of management by + temeriare, but not used by the pre-temeraire pageheap. + +``` +MALLOC: 32768 Tcmalloc page size +MALLOC: 2097152 Tcmalloc hugepage size +``` + +### Experiments + +There is an experiment framework embedded into TCMalloc. +The enabled experiments are reported as part of the statistics. + +``` +MALLOC EXPERIMENTS: TCMALLOC_TEMERAIRE=0 TCMALLOC_TEMERAIRE_WITH_SUBRELEASE_V3=0 +``` + +### Actual Memory Footprint + +The output also reports the memory size information recorded by the OS: + +* Bytes resident is the amount of physical memory in use by the application + (RSS). This includes things like program text which is excluded from the + information that TCMalloc presents. +* Bytes mapped is the size of the virtual address space in use by the + application (VSS). This can be substantially larger than the virtual memory + reported by TCMalloc as applications can increase VSS in other ways. It's + also not that useful as a metric since the VSS is a limit to the RSS, but + not directly related to the amount of physical memory that the application + uses. + +``` +Total process stats (inclusive of non-malloc sources): +TOTAL: 86880677888 (82855.9 MiB) Bytes resident (physical memory used) +TOTAL: 89124790272 (84996.0 MiB) Bytes mapped (virtual memory used) +``` + +### Per Class Size Information + +Requests for memory are rounded to convenient sizes. For example a request for +15 bytes could be rounded to 16 bytes. These sizes are referred to as class +sizes. There are various caches in TCMalloc where memory gets held, and the per +size class section reports how much memory is being used by cached objects of +each size. The columns reported for each class size are: + +* The class size +* The size of each object in that class size. +* The number of objects of that size currently held in the per-cpu, + per-thread, transfer, and central caches. +* The total size of those objects in MiB - ie size of each object multiplied + by the number of objects. +* The cumulative size of that class size plus all smaller class sizes. + +``` +Total size of freelists for per-thread and per-CPU caches, +transfer cache, and central cache, by size class +------------------------------------------------ +class 1 [ 8 bytes ] : 413460 objs; 3.2 MiB; 3.2 cum MiB +class 2 [ 16 bytes ] : 103410 objs; 1.6 MiB; 4.7 cum MiB +class 3 [ 24 bytes ] : 525973 objs; 12.0 MiB; 16.8 cum MiB +class 4 [ 32 bytes ] : 275250 objs; 8.4 MiB; 25.2 cum MiB +class 5 [ 40 bytes ] : 1047443 objs; 40.0 MiB; 65.1 cum MiB +... +``` + +### Per-CPU Information + +If the per-cpu cache is enabled then we get a report of the memory currently +being cached on each CPU. + +The first number reported is the maximum size of the per-cpu cache on each CPU. +This corresponds to the parameter `MallocExtension::GetMaxPerCpuCacheSize()`, +which defaults to 3MiB. [See tuning](tuning.md) + +The following columns are reported for each CPU: + +* The cpu ID +* The total size of the objects held in the CPU's cache in bytes. +* The total size of the objects held in the CPU's cache in MiB. +* The total number of unallocated bytes. + +The concept of unallocated bytes needs to be explained because the definition is +not obvious. + +The per-cpu cache is an array of pointers to available memory. Each class size +has a number of entries that it can use in the array. These entries can be used +to hold memory, or be empty. + +To control the maximum memory that the per-cpu cache can use we sum up the +number of slots that can be used by a size class multiplied by the size of +objects in that size class. This gives us the total memory that could be held in +the cache. This is not what is reported by unallocated memory. + +Unallocated memory is the amount of memory left over from the per cpu limit +after we have subtracted the total memory that could be held in the cache. + +The in use memory is calculated from the sum of the number of populated entries +in the per-cpu array multiplied by the size of the objects held in those +entries. + +To summarise, the per-cpu limit (which is reported before the per-cpu data) is +equal to the number of bytes in use (which is reported in the second column) +plus the number of bytes that could be used (which is not reported) plus the +unallocated "spare" bytes (which is reported as the last column). + +``` +Bytes in per-CPU caches (per cpu limit: 3145728 bytes) +------------------------------------------------ +cpu 0: 2168200 bytes ( 2.1 MiB) with 52536 bytes unallocated active +cpu 1: 1734880 bytes ( 1.7 MiB) with 258944 bytes unallocated active +cpu 2: 1779352 bytes ( 1.7 MiB) with 8384 bytes unallocated active +cpu 3: 1414224 bytes ( 1.3 MiB) with 112432 bytes unallocated active +cpu 4: 1260016 bytes ( 1.2 MiB) with 179800 bytes unallocated +... +``` + +Some CPU caches may be marked `active`, indicating that the process is currently +runnable on that CPU. + +### Pageheap Information + +The pageheap holds pages of memory that are not currently being used either by +the application or by TCMalloc's internal caches. These pages are grouped into +spans - which are ranges of contiguous pages, and these spans can be either +mapped (backed by physical memory) or unmapped (not necessarily backed by +physical memory). + +Memory from the pageheap is used either to replenish the per-thread or per-cpu +caches to to directly satisfy requests that are larger than the sizes supported +by the per-thread or per-cpu caches. + +**Note:** TCMalloc cannot tell whether a span of memory is actually backed by +physical memory, but it uses _unmapped_ to indicate that it has told the OS that +the span is not used and does not need the associated physical memory. For this +reason the physical memory of an application may be larger that the amount that +TCMalloc reports. + +The pageheap section contains the following information: + +* The first line reports the number of sizes of spans, the total memory that + these spans cover, and the total amount of that memory that is unmapped. +* The size of the span in number of pages. +* The number of spans of that size. +* The total memory consumed by those spans in MiB. +* The cumulative total memory held in spans of that size and fewer pages. +* The amount of that memory that has been unmapped. +* The cumulative amount of unmapped memory for spans of that size and smaller. + +``` +PageHeap: 30 sizes; 480.1 MiB free; 318.4 MiB unmapped +------------------------------------------------ + 1 pages * 341 spans ~ 10.7 MiB; 10.7 MiB cum; unmapped: 1.9 MiB; 1.9 MiB cum + 2 pages * 469 spans ~ 29.3 MiB; 40.0 MiB cum; unmapped: 0.0 MiB; 1.9 MiB cum + 3 pages * 462 spans ~ 43.3 MiB; 83.3 MiB cum; unmapped: 3.3 MiB; 5.2 MiB cum + 4 pages * 119 spans ~ 14.9 MiB; 98.2 MiB cum; unmapped: 0.1 MiB; 5.3 MiB cum +... +``` + +### Pageheap Cache Age + +The next section gives some indication of the age of the various spans in the +pageheap. Live (ie backed by physical memory) and unmapped spans are reported +separately. + +The columns indicate roughly how long the span has been in the pageheap, ranging +from less than a second to more than 8 hours. + +``` +------------------------------------------------ +PageHeap cache entry age (count of pages in spans of a given size that have been idle for up to the given period of time) +------------------------------------------------ + mean <1s 1s 30s 1m 30m 1h 8+h +Live span TOTAL PAGES: 9.1 533 13322 26 1483 0 0 0 +Live span, 1 pages: 7.4 0 256 0 24 0 0 0 +Live span, 2 pages: 1.6 38 900 0 0 0 0 0 +… +Unmapped span TOTAL PAGES: 153.9 153 2245 1801 5991 0 0 0 +Unmapped span, 1 pages: 34.6 0 35 15 11 0 0 0 +Unmapped span, 3 pages: 28.4 0 60 42 3 0 0 0 +... +``` + +### Pageheap Allocation Summary + +This reports some stats on the number of pages allocated. + +* The number of live (ie not on page heap) pages that were "small" + allocations. Small allocations are ones that are tracked in the pageheap by + size (eg a region of two pages in size). Larger allocations are just kept in + an array that has to be scanned linearly. +* The pages of slack result from situations where allocation is rounded up to + hugepages, and this leaves some spare pages. +* The largest seen allocation is self explanatory. + +``` +PageHeap: stats on allocation sizes +PageHeap: 344420 pages live small allocation +PageHeap: 12982 pages of slack on large allocations +PageHeap: largest seen allocation 29184 pages +``` + +### Pageheap Per Number Of Pages In Range + +This starts off reporting the activity for small ranges of pages, but at the end +of the list starts aggregating information for groups of page ranges. + +* The first column contains the number of pages (or the range of pages if the + bucket is wider than a single page). +* The second and third columns are the number of allocated and freed pages we + have seen of this size. +* The fourth column is the number of live allocations of this size. +* The fifth column is the size of those live allocations in MiB. +* The sixth column is the allocation rate in pages per second since the start + of the application. +* The seventh column is the allocation rate in MiB per second since the start + of the application. + +``` +PageHeap: per-size information: +PageHeap: 1 page info: 23978897 / 23762891 a/f, 216006 (6750.2 MiB) live, 2.43e+03 allocs/s ( 76.1 MiB/s) +PageHeap: 2 page info: 21442844 / 21436331 a/f, 6513 ( 407.1 MiB) live, 2.18e+03 allocs/s (136.0 MiB/s) +PageHeap: 3 page info: 2333686 / 2329225 a/f, 4461 ( 418.2 MiB) live, 237 allocs/s ( 22.2 MiB/s) +PageHeap: 4 page info: 21509168 / 21508751 a/f, 417 ( 52.1 MiB) live, 2.18e+03 allocs/s (272.9 MiB/s) +PageHeap: 5 page info: 3356076 / 3354188 a/f, 1888 ( 295.0 MiB) live, 341 allocs/s ( 53.2 MiB/s) +PageHeap: 6 page info: 1718534 / 1718486 a/f, 48 ( 9.0 MiB) live, 174 allocs/s ( 32.7 MiB/s) +... +``` + +### GWP-ASan Status + +The GWP-ASan section displays information about allocations guarded by GWP-ASan. + +* The number of successful and failed GWP-ASan allocations. If there are 0 + successful and 0 failed allocations, GWP-ASan is probably disabled on your + binary. If there are a large number of failed allocations, it probably means + your sampling rate is too high, causing the guarded slots to be exhausted. +* The number of "slots" currently allocated and quarantined. An allocated slot + contains an allocation that is still active (i.e. not freed) while a + quarantined slot has either not been used yet or contains an allocation that + was freed. +* The maximum number of slots that have been allocated at the same time. This + number is printed along with the allocated slot limit. If the maximum slots + allocated matches the limit, you may want to reduce your sampling rate to + avoid failed GWP-ASan allocations. + +``` +------------------------------------------------ +GWP-ASan Status +------------------------------------------------ +Successful Allocations: 1823 +Failed Allocations: 0 +Slots Currently Allocated: 33 +Slots Currently Quarantined: 95 +Moximum Slots Allocated: 51 / 64 +``` + +### Memory Requested From The OS + +The stats also report the amount of memory requested from the OS by mmap. + +Memory is also requested, but may not actually be backed by physical memory, so +these stats should resemble the VSS of the application, not the RSS. + +``` +Low-level allocator stats: +MmapSysAllocator: 18083741696 bytes (17246.0 MiB) allocated +``` + +## Temeraire + +### Introduction + +Temeraire (or Huge Page Aware Allocator) is a new page heap for TCMalloc that is +hugepage aware. It is designed to better handle memory backed by hugepages - +avoiding breaking them up. Since it is more elaborate code, it reports +additional information. + +### Summary Statistics + +The initial set of statistics from the Huge Page Aware Allocator are similar to +the old page heap, and show a summary of the number of instances of each range +of contiguous pages. + +``` +------------------------------------------------ +HugePageAware: 75 sizes; 938.8 MiB free; 1154.0 MiB unmapped +------------------------------------------------ + 1 pages * 86655 spans ~ 677.0 MiB; 677.0 MiB cum; unmapped: 0.0 MiB; 0.0 MiB cum + 2 pages * 3632 spans ~ 56.8 MiB; 733.7 MiB cum; unmapped: 0.0 MiB; 0.0 MiB cum + 3 pages * 288 spans ~ 6.8 MiB; 740.5 MiB cum; unmapped: 0.0 MiB; 0.0 MiB cum + 4 pages * 250 spans ~ 7.8 MiB; 748.3 MiB cum; unmapped: 0.0 MiB; 0.0 MiB cum +... +``` + +The first line indicates the number of different sizes of ranges, the total MiB +available, and the total MiB of unmapped ranges. The next lines are per number +of continuous pages: + +* The number of contiguous pages +* The number of spans of that number of pages +* The total number of MiB of that span size that are mapped. +* The cumulative total of the mapped pages. +* The total number of MiB of that span size that are unmapped. +* The cumulative total of the unmapped pages. + +### Per Component Information + +The Huge Page Aware Allocator has multiple places where pages of memory are +held. More details of its workings can be found in this document. There are four +caches where pages of memory can be located: + +* The filler, used for allocating ranges of a few TCMalloc pages in size. +* The region cache, used for allocating ranges of multiple pages. +* The huge cache which contains huge pages that are backed with memory. +* The huge page allocator which contains huge pages that are not backed by + memory. + +We get some summary information for the various caches, before we report +detailed information for each of the caches. + +``` +Huge page aware allocator components: +------------------------------------------------ +HugePageAware: breakdown of free / unmapped / used space: +HugePageAware: filler 38825.2 MiB used, 938.8 MiB free, 0.0 MiB unmapped +HugePageAware: region 0.0 MiB used, 0.0 MiB free, 0.0 MiB unmapped +HugePageAware: cache 908.0 MiB used, 0.0 MiB free, 0.0 MiB unmapped +HugePageAware: alloc 0.0 MiB used, 0.0 MiB free, 1154.0 MiB unmapped +``` + +The summary information tells us: + +* The first column shows how much memory has been allocated from each of the + caches +* The second column indicates how much backed memory is available in each + cache. +* The third column indicates how much unmapped memory is available in each + cache. + +### Filler Cache + +The filler cache contains TCMalloc sized pages from within a single hugepage. So +if we want a single TCMalloc page we will look for it in the filler. + +There are two sections of stats around the filler cache. The first section gives +an indication of the number and state of the hugepages in the filler cache. + +``` +HugePageFiller: densely pack small requests into hugepages +HugePageFiller: 19882 total, 3870 full, 16012 partial, 0 released, 0 quarantined +HugePageFiller: 120168 pages free in 19882 hugepages, 0.0236 free +HugePageFiller: among non-fulls, 0.0293 free +HugePageFiller: 0 hugepages partially released, nan released +HugePageFiller: 1.0000 of used pages hugepageable +``` + +The summary stats are as follows: + +* Total pages is the number of hugepages in the filler cache. +* Full is the number of hugepages on that have multiple in-use allocations. +* Partial is the remaining number of hugepages that have a single in-use + allocation. +* Released is the number of hugepages that are released - ie partially + unmapped. +* Quarantined is a feature has been disabled, so the result is currently zero. + +The second section gives an indication of the number of pages in various states +in the filler cache. + +``` +HugePageFiller: fullness histograms + +HugePageFiller: # of regular hps with a<= # of free pages = 64 and < 80. +* There are 6 regular hugepages with a longest contiguous length of exactly 1 + page. +* There are 2 regular hugepages with between 81 and 96 allocations. + +The three tracker types are "regular," "donated," and "released." "Regular" is +by far the most common, and indicates regular memory in the filler. + +"Donated" is hugepages that have been donated to the filler from the tail of +large (multi-hugepage) allocations, so that the leftover space can be packed +with smaller allocations. But we prefer to use up all useable regular hugepages +before touching the donated ones, which devolve to "regular" type once they are +used. Because of this last property, donated hugepages always have only one +allocation and their longest range equals their free space, so those histograms +aren't shown. + +"Released" is partially released hugepages. Normally the entirety of a hugepage +is backed by real RAM, but in partially released hugepages most of it has been +returned to the OS. Because this defeats the primary goal of the hugepage-aware +allocator, this is done rarely, and we only reuse partially-released hugepages +for new allocations as a last resort. + +### Region Cache + +The region cache holds a chunk of memory from which can be allocated spans of +multiple TCMalloc pages. The region cache may not be populated, and it can +contain multiple regions. + +``` +HugeRegionSet: 1 MiB+ allocations best-fit into 1024 MiB slabs +HugeRegionSet: 0 total regions +HugeRegionSet: 0 hugepages backed out of 0 total +HugeRegionSet: 0 pages free in backed region, nan free +``` + +The lines of output indicate: + +* The size of each region in MiB - this is currently 1GiB. +* The total number of regions in the region cache, in the example above there + are no regions in the cache. +* The number of backed hugepages in the cache out of the total number of + hugepages in the region cache. +* The number of free TCMalloc pages in the regions, and as a ratio of the + number of backed pages. + +### Huge Cache + +The huge cache contains backed hugepages, it grows and shrinks in size depending +on runtime conditions. Attempting to hold onto backed memory ready to be +provided for the application. + +``` +HugeCache: contains unused, backed hugepage(s) +HugeCache: 0 / 10 hugepages cached / cache limit (0.053 hit rate, 0.436 overflow rate) +HugeCache: 88880 MiB fast unbacked, 6814 MiB periodic +HugeCache: 1234 MiB*s cached since startup +HugeCache: recent usage range: 40672 min - 40672 curr - 40672 max MiB +HugeCache: recent offpeak range: 0 min - 0 curr - 0 max MiB +HugeCache: recent cache range: 0 min - 0 curr - 0 max MiB +``` + +The output shows the following information: + +* The number of hugepages out of the maximum number of hugepages we will hold + in the huge cache. The hit rate is how often we get pages from the huge + cache vs getting them from the huge allocator. The overflow rate is the + number of times we added something to the huge cache causing it to exceed + its size limit. +* The fast unbacked is the cumulative amount of memory unbacked due size + limitations, the periodic count is the cumulative amount of memory unbacked + by periodic calls to release unused memory. +* The amount of cumulative memory stored in HugeCache since the startup of the + process. In other words, the area under the cached-memory-vs-time curve. +* The usage range is the range minimum, current, maximum in MiB of memory + obtained from the huge cache. +* The off-peak range is the minimum, current, maximum cache size in MiB + compared to the peak cache size. +* The recent range is the minimum, current, maximum size of memory in MiB in + the huge cache. + +### Huge Allocator + +The huge allocator holds unmapped memory ranges. We allocate from here if we are +unable to allocate from any of the caches. + +``` +HugeAllocator: contiguous, unbacked hugepage(s) +HugeAddressMap: treap 5 / 10 nodes used / created +HugeAddressMap: 256 contiguous hugepages available +HugeAllocator: 20913 requested - 20336 in use = 577 hugepages free +``` + +The information reported here is: + +* The number of nodes used and created to handle regions of memory. +* The size of the longest contiguous region of available hugepages. +* The number of hugepages requested from the system, the number of hugepages + in used, and the number of hugepages available in the cache. + +### Pageheap Summary Information + +The new pageheap reports some summary information: + +``` +HugePageAware: stats on allocation sizes +HugePageAware: 4969003 pages live small allocation +HugePageAware: 659 pages of slack on large allocations +HugePageAware: largest seen allocation 45839 pages +``` + +These are: + +* The number of live "small" TCMalloc pages allocated (these less than 2MiB in + size). + [Note: the 2MiB size distinction is separate from the size of hugepages] +* The number of TCMalloc pages which are left over from "large" allocations. + These allocations are larger than 2MiB in size, and are rounded to a + hugepage - the slack being the amount left over after rounding. +* The largest seen allocation request in TCMalloc pages. + +### Per Size Range Info: + +The per size range info is the same format as the old pageheap: + +* The first column contains the number of pages (or the range of pages if the + bucket is wider than a single page). +* The second and third columns are the number of allocated and freed pages we + have seen of this size. +* The fourth column is the number of live allocations of this size. +* The fifth column is the size of those live allocations in MiB. +* The sixth column is the allocation rate in pages per second since the start + of the application. +* The seventh column is the allocation rate in MiB per second since the start + of the application. + +``` +HugePageAware: per-size information: +HugePageAware: 1 page info: 5817510 / 3863506 a/f, 1954004 (15265.7 MiB) live, 16 allocs/s ( 0.1 MiB/s) +HugePageAware: 2 page info: 1828473 / 1254096 a/f, 574377 ( 8974.6 MiB) live, 5.03 allocs/s ( 0.1 MiB/s) +HugePageAware: 3 page info: 1464568 / 1227253 a/f, 237315 ( 5562.1 MiB) live, 4.03 allocs/s ( 0.1 MiB/s) +... +``` + +### Pageheap Age Information: + +The new pageheap allocator also reports information on the age of the various +page ranges. In this example you can see that there was a large number of +unmapped pages in the last minute. + +``` +------------------------------------------------ +HugePageAware cache entry age (count of pages in spans of a given size that have been idle for up to the given period of time) +------------------------------------------------ + mean <1s 1s 30s 1m 30m 1h 8+h +Live span TOTAL PAGES: 29317.6 145 549 1775 13059 13561 58622 32457 +Live span, 1 pages: 35933.7 0 55 685 6354 8111 43853 27597 +... +Unmapped span TOTAL PAGES: 51.3 0 0 131072 16640 0 0 0 +Unmapped span, >=64 pages: 51.3 0 0 131072 16640 0 0 0 +... +``` + diff --git a/docs/tuning.md b/docs/tuning.md new file mode 100644 index 000000000..c7f72d573 --- /dev/null +++ b/docs/tuning.md @@ -0,0 +1,131 @@ +# Performance Tuning TCMalloc + +There are three user accessible controls that we can use to performance tune +TCMalloc: + +* The logical page size for TCMalloc (4KiB, 8KiB, 32KiB, 256KiB) +* The per-thread or per-cpu cache sizes +* The rate at which memory is released to the OS + +None of these tuning parameters are clear wins, otherwise they would be the +default. We'll discuss the advantages and disadvantages of changing them. + +## The Logical Page Size for TCMalloc: + +This is determined at compile time by linking in the appropriate version of +TCMalloc. The page size indicates the unit in which TCMalloc manages memory. The +default is in 8KiB chunks, there are larger options of 32KiB and 256KiB. There +is also the 4KiB page size used by the small-but-slow allocator. + +A smaller page size allows TCMalloc to provide memory to an application with +less waste. Waste comes about through two issues: + +* Left-over memory when rounding larger requests to the page size (eg a + request for 62 KiB might get rounded to 64 KiB). +* Pages of memory that are stuck because they have a single in use allocation + on the page, and therefore cannot be repurposed to hold a different size of + allocation. + +The second of these points is worth elucidating. For small allocations TCMalloc +will fit multiple objects onto a single page. + +So if you request 512 bytes, then an entire page will be devoted to 512 byte +objects. If the size of that page is 4KiB we get 8 objects, if the size of that +page is 256KiB we get 512 objects. That page can only be used for 512 byte +objects until all the objects on the page have been freed. + +If you have 8 objects on a page, there's a reasonable chance that all 8 will +become free at the same time, and we can repurpose the page for objects of a +different size. If there's 512 objects on that page, then it is very unlikely +that all the objects will become freed at the same time, so that page will +probably never become entirely free and will probably hang around, potentially +containing only a few in-use objects. + +The consequence of this is that large pages tend to lead to a larger memory +footprint. There's also the issue that if you want one object of a size, you +need to allocate a whole page. + +The advantage of managing objects using larger page sizes are: + +* Objects of the same size are better clustered in memory. If you need 512 KiB + of 8 byte objects, then that's two 256 KiB pages, or 128 x 4 KiB pages. If + memory is largely backed by hugepages, then with large pages in the worst + case we can map the entire demand with two large pages, whereas small pages + could take up to 128 entries in the TLB. +* There's a structure called the `PageMap` which enables TCMalloc to lookup + information about any allocated memory. If we use large pages the pagemap + needs fewer entries and can be much smaller. This makes it more likely that + it is cache resident. However, sized delete substantially reduced the number + of times that we need to consult the pagemap, so the benefit from larger + pages is reduced. + +**Suggestion:** The default of 8KiB page sizes is probably good enough for most +applications. However, if an application has a heap measured in GiB it may be +worth looking at using large page sizes. + +**Suggestion:** Consider small-but-slow if it is more important to minimise +memory footprint over performance. + +**Note:** Class sizes are determined on a per-page-size basis. So changing the +page size will implicitly change the class sizes used. Class sizes are selected +to be memory-efficient for the applications using that page size. If an +application changes page size, there may be a performance or memory impact from +the different selection of class sizes. + +## Per-thread/per-cpu Cache Sizes + +The default is for TCMalloc to run in per-cpu mode as this is faster; however, +there are few applications which have not yet transitioned. The plan is to move +these across at some point soon. + +Increasing the size of the cache is an obvious way to improve performance. The +larger the cache the less frequently memory needs to be fetched from the central +caches. Returning memory from the cache is substantially faster than fetching +from the central cache. + +The size of the per-cpu caches is controlled by +`tcmalloc::MallocExtension::SetMaxPerCpuCacheSize`. This controls the limit for +each CPU, so the total amount of memory for application could be much larger +than this. Memory on CPUs where the application is no longer able to run can be +freed by calling `tcmalloc::MallocExtension::ReleaseCpuMemory`. + +In contrast `tcmalloc::MallocExtension::SetMaxTotalThreadCacheBytes` controls +the _total_ size of all thread caches in the application. + +**Suggestion:** The default cache size is typically sufficient, but cache size +can be increased (or decreased) depending on the amount of time spent in +TCMalloc code, and depending on the overall size of the application (a larger +application can afford to cache more memory without noticeably increasing its +overall size). + +## Memory Releasing + +`tcmalloc::MallocExtension::ReleaseMemoryToSystem` makes a request to release +`n` bytes of memory to TCMalloc. This can keep the memory footprint of the +application down to a minimal amount, however it should be considered that this +just reduces the application down from it's peak memory footprint, and does not +make that peak memory footprint smaller. + +There are two disadvantages of releasing memory aggressively: + +* Memory that is unmapped may be immediately needed, and there is a cost to + faulting unmapped memory back into the application. +* Memory that is unmapped at small granularity will break up hugepages, and + this will cause some performance loss due to increased TLB misses. + +**Note:** Release rate is not a panacea for memory usage. Jobs should be +provisioned for peak memory usage to avoid OOM errors. Setting a release rate +may enable an application to exceed the memory limit for short periods of +time without triggering an OOM. A release rate is also a good citizen behavior +as it will enable the system to use spare capacity memory for applications +which are are under provisioned. However, it is not a substitute for setting +appropriate memory requirements for the job. + +**Note:** Memory is released from the `PageHeap` and stranded per-cpu caches. +It is not possible to release memory from other internal structures, like +the `CentralFreeList`. + +**Suggestion:** The default release rate is probably appropriate for most +applications. In situations where it is tempting to set a faster rate it is +worth considering why there are memory spikes, since those spikes are likely to +cause an OOM at some point. diff --git a/tcmalloc/BUILD b/tcmalloc/BUILD new file mode 100644 index 000000000..12aca9a2f --- /dev/null +++ b/tcmalloc/BUILD @@ -0,0 +1,995 @@ +# Copyright 2019 The TCMalloc Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Description: +# +# tcmalloc is a fast malloc implementation. + +load("//tcmalloc:copts.bzl", "TCMALLOC_DEFAULT_COPTS") + +package(default_visibility = ["//visibility:private"]) + +licenses(["notice"]) + +config_setting( + name = "llvm", + flag_values = { + "@bazel_tools//tools/cpp:compiler": "llvm", + }, + visibility = ["//visibility:private"], +) + +NO_BUILTIN_MALLOC = [ + "-fno-builtin-malloc", + "-fno-builtin-free", +] + +overlay_deps = [ +] + +cc_library( + name = "experiment", + srcs = ["experiment.cc"], + hdrs = [ + "experiment.h", + "experiment_config.h", + ], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":malloc_extension", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + ], +) + +cc_library( + name = "percpu_tcmalloc", + hdrs = ["percpu_tcmalloc.h"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + "//tcmalloc/internal:mincore", + "//tcmalloc/internal:percpu", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:dynamic_annotations", + ], +) + +# Dependencies required by :tcmalloc and its variants. Since :common is built +# several different ways, it should not be included on this list. +tcmalloc_deps = [ + ":experiment", + ":malloc_extension", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:config", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:dynamic_annotations", + "@com_google_absl//absl/debugging:leak_check", + "@com_google_absl//absl/debugging:stacktrace", + "@com_google_absl//absl/debugging:symbolize", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "//tcmalloc/internal:declarations", + "//tcmalloc/internal:linked_list", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:memory_stats", + "//tcmalloc/internal:percpu", +] + +# This library provides tcmalloc always +cc_library( + name = "tcmalloc", + srcs = [ + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "sampler.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//visibility:public"], + deps = overlay_deps + tcmalloc_deps + [ + ":common", + ], + alwayslink = 1, +) + +# Provides tcmalloc always; use per-thread mode. +# +cc_library( + name = "tcmalloc_deprecated_perthread", + srcs = [ + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = ["-DTCMALLOC_DEPRECATED_PERTHREAD"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = [ + "//tcmalloc/testing:__pkg__", + ], + deps = overlay_deps + tcmalloc_deps + [ + ":common_deprecated_perthread", + ], + alwayslink = 1, +) + +# An opt tcmalloc build with ASSERTs forced on (by turning off +# NDEBUG). Useful for tracking down crashes in production binaries. +# To use add malloc = "//tcmalloc:opt_with_assertions" in your +# target's build rule. +cc_library( + name = "opt_with_assertions", + srcs = [ + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = [ + "-O2", + "-UNDEBUG", + ] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//visibility:public"], + deps = overlay_deps + tcmalloc_deps + [ + ":common", + ], + alwayslink = 1, +) + +cc_library( + name = "size_class_info", + hdrs = ["size_class_info.h"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + "//tcmalloc/internal:logging", + ], +) + +# List of common source files used by the various tcmalloc libraries. +common_srcs = [ + "arena.cc", + "arena.h", + "central_freelist.cc", + "central_freelist.h", + "common.cc", + "common.h", + "cpu_cache.cc", + "cpu_cache.h", + "experimental_size_classes.cc", + "guarded_page_allocator.h", + "guarded_page_allocator.cc", + "huge_address_map.cc", + "huge_allocator.cc", + "huge_allocator.h", + "huge_cache.cc", + "huge_cache.h", + "huge_region.h", + "huge_page_aware_allocator.cc", + "huge_page_aware_allocator.h", + "huge_page_filler.h", + "huge_pages.h", + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "libc_override_redefine.h", + "page_allocator.cc", + "page_allocator.h", + "page_allocator_interface.cc", + "page_allocator_interface.h", + "page_heap.cc", + "page_heap.h", + "page_heap_allocator.h", + "pagemap.cc", + "pagemap.h", + "parameters.cc", + "peak_heap_tracker.cc", + "sampler.cc", + "sampler.h", + "size_classes.cc", + "span.cc", + "span.h", + "stack_trace_table.cc", + "stack_trace_table.h", + "static_vars.cc", + "static_vars.h", + "stats.cc", + "system-alloc.cc", + "system-alloc.h", + "tcmalloc.h", + "thread_cache.cc", + "thread_cache.h", + "tracking.h", + "transfer_cache.cc", + "transfer_cache.h", +] + +common_hdrs = [ + "arena.h", + "central_freelist.h", + "common.h", + "cpu_cache.h", + "guarded_page_allocator.h", + "huge_address_map.h", + "huge_allocator.h", + "tcmalloc_policy.h", + "huge_cache.h", + "huge_page_filler.h", + "huge_pages.h", + "huge_region.h", + "huge_page_aware_allocator.h", + "libc_override.h", + "page_allocator.h", + "page_allocator_interface.h", + "page_heap.h", + "page_heap_allocator.h", + "pagemap.h", + "parameters.h", + "peak_heap_tracker.h", + "sampler.h", + "span.h", + "stack_trace_table.h", + "stats.h", + "static_vars.h", + "system-alloc.h", + "tcmalloc.h", + "thread_cache.h", + "tracking.h", + "transfer_cache.h", +] + +common_deps = [ + ":experiment", + ":malloc_extension", + ":noruntime_size_classes", + ":percpu_tcmalloc", + ":size_class_info", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:config", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:dynamic_annotations", + "@com_google_absl//absl/debugging:debugging_internal", + "@com_google_absl//absl/debugging:stacktrace", + "@com_google_absl//absl/debugging:symbolize", + "@com_google_absl//absl/hash:hash", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", + "//tcmalloc/internal:atomic_stats_counter", + "//tcmalloc/internal:bits", + "//tcmalloc/internal:declarations", + "//tcmalloc/internal:linked_list", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:mincore", + "//tcmalloc/internal:parameter_accessors", + "//tcmalloc/internal:percpu", + "//tcmalloc/internal:range_tracker", + "//tcmalloc/internal:util", +] + +cc_library( + name = "common", + srcs = common_srcs, + hdrs = common_hdrs, + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//tcmalloc:tcmalloc_tests"], + deps = common_deps, + alwayslink = 1, +) + +cc_library( + name = "common_deprecated_perthread", + srcs = common_srcs, + hdrs = common_hdrs, + copts = ["-DTCMALLOC_DEPRECATED_PERTHREAD"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = common_deps, + alwayslink = 1, +) + +# TEMPORARY. WILL BE REMOVED. +# Add a dep to this if you want your binary to use hugepage-aware +# allocator. +cc_library( + name = "want_hpaa", + srcs = ["want_hpaa.cc"], + copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, + visibility = ["//visibility:public"], + deps = [ + "@com_google_absl//absl/base:core_headers", + ], + alwayslink = 1, +) + +# TEMPORARY. WILL BE REMOVED. +# Add a dep to this if you want your binary to use hugepage-aware +# allocator with hpaa_subrelease=true. +cc_library( + name = "want_hpaa_subrelease", + srcs = ["want_hpaa_subrelease.cc"], + copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, + visibility = ["//visibility:public"], + deps = [ + "@com_google_absl//absl/base:core_headers", + ], + alwayslink = 1, +) + +# TEMPORARY. WILL BE REMOVED. +# Add a dep to this if you want your binary to not use hugepage-aware +# allocator. +cc_library( + name = "want_no_hpaa", + srcs = ["want_no_hpaa.cc"], + copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc/testing:__pkg__", + ], + deps = [ + "@com_google_absl//absl/base:core_headers", + ], + alwayslink = 1, +) + +cc_library( + name = "runtime_size_classes", + srcs = ["runtime_size_classes.cc"], + hdrs = ["runtime_size_classes.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//visibility:private", + ], + deps = [ + ":size_class_info", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + +cc_library( + name = "noruntime_size_classes", + srcs = ["noruntime_size_classes.cc"], + hdrs = ["runtime_size_classes.h"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":size_class_info", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + +cc_library( + name = "tcmalloc_large_pages", + srcs = [ + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = ["-DTCMALLOC_LARGE_PAGES"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//visibility:public"], + deps = overlay_deps + tcmalloc_deps + [ + ":common_large_pages", + ], + alwayslink = 1, +) + +cc_library( + name = "common_large_pages", + srcs = common_srcs, + hdrs = common_hdrs, + copts = ["-DTCMALLOC_LARGE_PAGES"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = common_deps, + alwayslink = 1, +) + +# This is another large page configuration (256k) +cc_library( + name = "tcmalloc_256k_pages", + srcs = [ + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = ["-DTCMALLOC_256K_PAGES"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//visibility:public"], + deps = overlay_deps + tcmalloc_deps + [ + ":common_256k_pages", + ], + alwayslink = 1, +) + +cc_library( + name = "common_256k_pages", + srcs = common_srcs, + hdrs = common_hdrs, + copts = ["-DTCMALLOC_256K_PAGES"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = common_deps, + alwayslink = 1, +) + +cc_library( + name = "tcmalloc_small_but_slow", + srcs = [ + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//visibility:public"], + deps = overlay_deps + tcmalloc_deps + [ + ":common_small_but_slow", + ], + alwayslink = 1, +) + +cc_library( + name = "common_small_but_slow", + srcs = common_srcs, + hdrs = common_hdrs, + copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = common_deps, + alwayslink = 1, +) + +# Export some header files to tcmalloc/testing/... +package_group( + name = "tcmalloc_tests", + packages = [ + "//tcmalloc/testing/...", + ], +) + +cc_library( + name = "headers_for_tests", + srcs = [ + "arena.h", + "central_freelist.h", + "guarded_page_allocator.h", + "huge_address_map.h", + "huge_allocator.h", + "huge_cache.h", + "huge_page_aware_allocator.h", + "huge_page_filler.h", + "huge_pages.h", + "huge_region.h", + "page_allocator.h", + "page_allocator_interface.h", + "page_heap.h", + "page_heap_allocator.h", + "pagemap.h", + "parameters.h", + "peak_heap_tracker.h", + "stack_trace_table.h", + "transfer_cache.h", + ], + hdrs = [ + "common.h", + "sampler.h", + "size_class_info.h", + "span.h", + "static_vars.h", + "stats.h", + "system-alloc.h", + ], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = ["//tcmalloc:tcmalloc_tests"], + deps = common_deps, +) + +cc_library( + name = "page_allocator_test_util", + testonly = 1, + srcs = [ + "page_allocator_test_util.h", + ], + hdrs = ["page_allocator_test_util.h"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + ":malloc_extension", + ], +) + +cc_test( + name = "page_heap_test", + srcs = ["page_heap_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/memory", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "huge_cache_test", + srcs = ["huge_cache_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/random", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "huge_allocator_test", + srcs = ["huge_allocator_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/random", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "huge_page_filler_test", + srcs = ["huge_page_filler_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:distributions", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "huge_region_test", + srcs = ["huge_region_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/random", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "guarded_page_allocator_test", + srcs = ["guarded_page_allocator_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "pagemap_unittest", + srcs = ["pagemap_unittest.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "realloc_unittest", + srcs = ["realloc_unittest.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:distributions", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "stack_trace_table_test", + srcs = ["stack_trace_table_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/debugging:stacktrace", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "system-alloc_unittest", + srcs = ["system-alloc_unittest.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":common", + ":malloc_extension", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_googletest//:gtest_main", + ], +) + +# This test has been named "large" since before tests were s/m/l. +# The "large" refers to large allocation sizes. +cc_test( + name = "tcmalloc_large_unittest", + size = "small", + timeout = "moderate", + srcs = ["tcmalloc_large_unittest.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":common", + ":malloc_extension", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:node_hash_set", + "@com_google_googletest//:gtest_main", + ], +) + +# There are more unittests in the tools subdirectory! (Mostly, those +# tests that depend on more than just //base and //tcmalloc). + +cc_test( + name = "malloc_extension_system_malloc_test", + srcs = ["malloc_extension_system_malloc_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc/internal:system_malloc", + deps = [ + ":malloc_extension", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "malloc_extension_test", + srcs = ["malloc_extension_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":malloc_extension", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "page_allocator_test", + srcs = ["page_allocator_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = [ + ":common", + ":malloc_extension", + ":page_allocator_test_util", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "profile_test", + size = "medium", + srcs = ["profile_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + shard_count = 2, + deps = [ + ":common", + ":malloc_extension", + "//tcmalloc/internal:declarations", + "//tcmalloc/internal:linked_list", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "size_classes_test", + srcs = ["size_classes_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":common", + ":size_class_info", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "size_classes_test_large_pages", + srcs = ["size_classes_test.cc"], + copts = ["-DTCMALLOC_LARGE_PAGES"] + NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc:tcmalloc_large_pages", + deps = [ + ":common_large_pages", + ":size_class_info", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "size_classes_test_256k_pages", + srcs = ["size_classes_test.cc"], + copts = ["-DTCMALLOC_256K_PAGES"] + NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc:tcmalloc_256k_pages", + deps = [ + ":common_256k_pages", + ":size_class_info", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "size_classes_test_small_but_slow", + srcs = ["size_classes_test.cc"], + copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc:tcmalloc_small_but_slow", + deps = [ + ":common_small_but_slow", + ":size_class_info", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "size_classes_test_with_runtime_size_classes", + srcs = ["size_classes_with_runtime_size_classes_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + deps = [ + ":common", + ":runtime_size_classes", + ":size_class_info", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "heap_profiling_test", + srcs = ["heap_profiling_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":common", + ":malloc_extension", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "runtime_size_classes_test", + srcs = ["runtime_size_classes_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + deps = [ + ":runtime_size_classes", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "span_test", + srcs = ["span_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":headers_for_tests", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "span_test_small_but_slow", + srcs = ["span_test.cc"], + copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc:tcmalloc_small_but_slow", + deps = [ + ":headers_for_tests", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "span_test_large_pages", + srcs = ["span_test.cc"], + copts = ["-DTCMALLOC_LARGE_PAGES"] + NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc:tcmalloc_large_pages", + deps = [ + ":headers_for_tests", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "span_test_256k_pages", + srcs = ["span_test.cc"], + copts = ["-DTCMALLOC_256K_PAGES"] + NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc:tcmalloc_256k_pages", + deps = [ + ":headers_for_tests", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "stats_test", + srcs = ["stats_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":headers_for_tests", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "huge_address_map_test", + srcs = ["huge_address_map_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "malloc_extension", + srcs = ["malloc_extension.cc"], + hdrs = [ + "internal_malloc_extension.h", + "malloc_extension.h", + ], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//visibility:public", + ], + deps = [ + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:dynamic_annotations", + "@com_google_absl//absl/base:malloc_internal", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", + ], +) + +cc_test( + name = "experiment_config_test", + srcs = ["experiment_config_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":experiment", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/tcmalloc/arena.cc b/tcmalloc/arena.cc new file mode 100644 index 000000000..975f842c0 --- /dev/null +++ b/tcmalloc/arena.cc @@ -0,0 +1,48 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/arena.h" + +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/system-alloc.h" + +namespace tcmalloc { + +void* Arena::Alloc(size_t bytes) { + char* result; + bytes = ((bytes + kAlignment - 1) / kAlignment) * kAlignment; + if (free_avail_ < bytes) { + size_t ask = bytes > kAllocIncrement ? bytes : kAllocIncrement; + size_t actual_size; + free_area_ = reinterpret_cast( + SystemAlloc(ask, &actual_size, kPageSize, /*tagged=*/false)); + if (ABSL_PREDICT_FALSE(free_area_ == nullptr)) { + Log(kCrash, __FILE__, __LINE__, + "FATAL ERROR: Out of memory trying to allocate internal tcmalloc " + "data (bytes, object-size)", + kAllocIncrement, bytes); + } + SystemBack(free_area_, actual_size); + free_avail_ = actual_size; + } + + ASSERT(reinterpret_cast(free_area_) % kAlignment == 0); + result = free_area_; + free_area_ += bytes; + free_avail_ -= bytes; + bytes_allocated_ += bytes; + return reinterpret_cast(result); +} + +} // namespace tcmalloc diff --git a/tcmalloc/arena.h b/tcmalloc/arena.h new file mode 100644 index 000000000..59a727572 --- /dev/null +++ b/tcmalloc/arena.h @@ -0,0 +1,69 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_ARENA_H_ +#define TCMALLOC_ARENA_H_ + +#include +#include + +#include "absl/base/thread_annotations.h" +#include "tcmalloc/common.h" + +namespace tcmalloc { + +// Arena allocation; designed for use by tcmalloc internal data structures like +// spans, profiles, etc. Always expands. +class Arena { + public: + Arena() { + } + + // We use an explicit Init function because these variables are statically + // allocated and their constructors might not have run by the time some other + // static variable tries to allocate memory. + void Init() EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + free_area_ = nullptr; + free_avail_ = 0; + bytes_allocated_ = 0; + } + + // Return a properly aligned byte array of length "bytes". Crashes if + // allocation fails. Requires pageheap_lock is held. + void* Alloc(size_t bytes) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Returns the total number of bytes allocated from this arena. Requires + // pageheap_lock is held. + uint64_t bytes_allocated() const EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return bytes_allocated_; + } + + private: + // How much to allocate from system at a time + static const int kAllocIncrement = 128 << 10; + + // Free area from which to carve new objects + char* free_area_ GUARDED_BY(pageheap_lock); + size_t free_avail_ GUARDED_BY(pageheap_lock); + + // Total number of bytes allocated from this arena + uint64_t bytes_allocated_ GUARDED_BY(pageheap_lock); + + Arena(const Arena&) = delete; + Arena& operator=(const Arena&) = delete; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_ARENA_H_ diff --git a/tcmalloc/central_freelist.cc b/tcmalloc/central_freelist.cc new file mode 100644 index 000000000..a67759260 --- /dev/null +++ b/tcmalloc/central_freelist.cc @@ -0,0 +1,156 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/central_freelist.h" + +#include + +#include "tcmalloc/internal/linked_list.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/page_heap.h" +#include "tcmalloc/pagemap.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { + +// Like a constructor and hence we disable thread safety analysis. +void CentralFreeList::Init(size_t cl) NO_THREAD_SAFETY_ANALYSIS { + size_class_ = cl; + object_size_ = Static::sizemap()->class_to_size(cl); + objects_per_span_ = Static::sizemap()->class_to_pages(cl) * kPageSize / + (cl ? object_size_ : 1); + nonempty_.Init(); + num_spans_.Clear(); + counter_.Clear(); +} + +static Span* MapObjectToSpan(void* object) { + const PageID p = reinterpret_cast(object) >> kPageShift; + Span* span = Static::pagemap()->GetExistingDescriptor(p); + return span; +} + +Span* CentralFreeList::ReleaseToSpans(void* object, Span* span) { + if (span->FreelistEmpty()) { + nonempty_.prepend(span); + } + + if (span->FreelistPush(object, object_size_)) { + return nullptr; + } + + counter_.LossyAdd(-objects_per_span_); + num_spans_.LossyAdd(-1); + span->RemoveFromList(); // from nonempty_ + return span; +} + +void CentralFreeList::InsertRange(void** batch, int N) { + CHECK_CONDITION(N > 0 && N <= kMaxObjectsToMove); + Span* spans[kMaxObjectsToMove]; + // Safe to store free spans into freed up space in span array. + Span** free_spans = spans; + int free_count = 0; + + // Prefetch Span objects to reduce cache misses. + for (int i = 0; i < N; ++i) { + Span* span = MapObjectToSpan(batch[i]); + ASSERT(span != nullptr); +#if defined(__GNUC__) + __builtin_prefetch(span, 0, 3); +#endif + spans[i] = span; + } + + // First, release all individual objects into spans under our mutex + // and collect spans that become completely free. + { + absl::base_internal::SpinLockHolder h(&lock_); + for (int i = 0; i < N; ++i) { + Span* span = ReleaseToSpans(batch[i], spans[i]); + if (span) { + free_spans[free_count] = span; + free_count++; + } + } + counter_.LossyAdd(N); + } + + // Then, release all free spans into page heap under its mutex. + if (free_count) { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + for (int i = 0; i < free_count; ++i) { + ASSERT(!IsTaggedMemory(free_spans[i]->start_address())); + Static::pagemap()->UnregisterSizeClass(free_spans[i]); + Static::page_allocator()->Delete(free_spans[i], /*tagged=*/false); + } + } +} + +int CentralFreeList::RemoveRange(void** batch, int N) { + ASSERT(N > 0); + absl::base_internal::SpinLockHolder h(&lock_); + if (nonempty_.empty()) { + Populate(); + } + + int result = 0; + while (result < N && !nonempty_.empty()) { + Span* span = nonempty_.first(); + int here = span->FreelistPopBatch(batch + result, N - result, object_size_); + ASSERT(here > 0); + if (span->FreelistEmpty()) { + span->RemoveFromList(); // from nonempty_ + } + result += here; + } + counter_.LossyAdd(-result); + return result; +} + +// Fetch memory from the system and add to the central cache freelist. +void CentralFreeList::Populate() NO_THREAD_SAFETY_ANALYSIS { + // Release central list lock while operating on pageheap + lock_.Unlock(); + const size_t npages = Static::sizemap()->class_to_pages(size_class_); + + Span* span = Static::page_allocator()->New(npages, /*tagged=*/false); + if (span == nullptr) { + Log(kLog, __FILE__, __LINE__, + "tcmalloc: allocation failed", npages << kPageShift); + lock_.Lock(); + return; + } + ASSERT(span->num_pages() == npages); + + Static::pagemap()->RegisterSizeClass(span, size_class_); + span->BuildFreelist(object_size_, objects_per_span_); + + // Add span to list of non-empty spans + lock_.Lock(); + nonempty_.prepend(span); + num_spans_.LossyAdd(1); + counter_.LossyAdd(objects_per_span_); +} + +size_t CentralFreeList::OverheadBytes() { + if (size_class_ == 0) { // 0 holds the 0-sized allocations + return 0; + } + const size_t pages_per_span = Static::sizemap()->class_to_pages(size_class_); + const size_t overhead_per_span = (pages_per_span * kPageSize) % object_size_; + return static_cast(num_spans_.value()) * overhead_per_span; +} + +} // namespace tcmalloc diff --git a/tcmalloc/central_freelist.h b/tcmalloc/central_freelist.h new file mode 100644 index 000000000..53d9fd013 --- /dev/null +++ b/tcmalloc/central_freelist.h @@ -0,0 +1,98 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_CENTRAL_FREELIST_H_ +#define TCMALLOC_CENTRAL_FREELIST_H_ + +#include + +#include "absl/base/internal/spinlock.h" +#include "absl/base/macros.h" +#include "absl/base/thread_annotations.h" +#include "tcmalloc/internal/atomic_stats_counter.h" +#include "tcmalloc/span.h" + +namespace tcmalloc { + +// Data kept per size-class in central cache. +class CentralFreeList { + public: + // A CentralFreeList may be used before its constructor runs. + // So we prevent lock_'s constructor from doing anything to the lock_ state. + CentralFreeList() + : lock_(absl::base_internal::kLinkerInitialized), + counter_(absl::base_internal::kLinkerInitialized), + num_spans_(absl::base_internal::kLinkerInitialized) {} + + void Init(size_t cl) LOCKS_EXCLUDED(lock_); + + // These methods all do internal locking. + + // Insert batch[0..N-1] into the central freelist. + // REQUIRES: N > 0 && N <= kMaxObjectsToMove. + void InsertRange(void **batch, int N) LOCKS_EXCLUDED(lock_); + + // Fill a prefix of batch[0..N-1] with up to N elements removed from central + // freelist. Return the number of elements removed. + int RemoveRange(void **batch, int N) LOCKS_EXCLUDED(lock_); + + // Returns the number of free objects in cache. + size_t length() { return static_cast(counter_.value()); } + + // Returns the memory overhead (internal fragmentation) attributable + // to the freelist. This is memory lost when the size of elements + // in a freelist doesn't exactly divide the page-size (an 8192-byte + // page full of 5-byte objects would have 2 bytes memory overhead). + size_t OverheadBytes(); + + // My size class. + size_t size_class() const { + return size_class_; + } + + private: + // Release an object to spans. + // Returns object's span if it become completely free. + Span* ReleaseToSpans(void* object, Span* span) + EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // Populate cache by fetching from the page heap. + // May temporarily release lock_. + void Populate() EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // This lock protects all the mutable data members. + absl::base_internal::SpinLock lock_; + + size_t size_class_; // My size class (immutable after Init()) + size_t object_size_; + size_t objects_per_span_; + + // Following are kept as a StatsCounter so that they can read without + // acquiring a lock. Updates to these variables are guarded by lock_ so writes + // are performed using LossyAdd for speed, the lock still guarantees accuracy. + + // Num free objects in cache entry + tcmalloc_internal::StatsCounter counter_; + // Num spans in empty_ plus nonempty_ + tcmalloc_internal::StatsCounter num_spans_; + + SpanList nonempty_ GUARDED_BY(lock_); // Dummy header for non-empty spans + + CentralFreeList(const CentralFreeList&) = delete; + CentralFreeList& operator=(const CentralFreeList&) = delete; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_CENTRAL_FREELIST_H_ diff --git a/tcmalloc/common.cc b/tcmalloc/common.cc new file mode 100644 index 000000000..b42de5843 --- /dev/null +++ b/tcmalloc/common.cc @@ -0,0 +1,162 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/common.h" + +#include "tcmalloc/experiment.h" +#include "tcmalloc/runtime_size_classes.h" +#include "tcmalloc/sampler.h" + +namespace tcmalloc { + +// Load sizes classes from environment variable if present +// and valid, then returns True. If not found or valid, returns +// False. +bool SizeMap::MaybeRunTimeSizeClasses() { + SizeClassInfo parsed[kNumClasses]; + int num_classes = MaybeSizeClassesFromEnv(kMaxSize, kNumClasses, parsed); + if (!ValidSizeClasses(num_classes, parsed)) { + return false; + } + + if (num_classes != kNumClasses) { + // TODO(b/122839049) - Add tests for num_classes < kNumClasses before + // allowing that case. + Log(kLog, __FILE__, __LINE__, "Can't change the number of size classes", + num_classes, kNumClasses); + return false; + } + + SetSizeClasses(num_classes, parsed); + Log(kLog, __FILE__, __LINE__, "Loaded valid Runtime Size classes"); + return true; +} + +void SizeMap::SetSizeClasses(int num_classes, const SizeClassInfo* parsed) { + class_to_size_[0] = 0; + class_to_pages_[0] = 0; + num_objects_to_move_[0] = 0; + + for (int c = 1; c < num_classes; c++) { + class_to_size_[c] = parsed[c].size; + class_to_pages_[c] = parsed[c].pages; + num_objects_to_move_[c] = parsed[c].num_to_move; + } + + // Fill any unspecified size classes with the largest size + // from the static definitions. + for (int x = num_classes; x < kNumClasses; x++) { + class_to_size_[x] = kSizeClasses[kNumClasses - 1].size; + class_to_pages_[x] = kSizeClasses[kNumClasses - 1].pages; + auto num_to_move = kSizeClasses[kNumClasses - 1].num_to_move; + if (IsExperimentActive(Experiment::TCMALLOC_LARGE_NUM_TO_MOVE)) { + num_to_move = std::min(kMaxObjectsToMove, 4 * num_to_move); + } + num_objects_to_move_[x] = num_to_move; + } +} + +// Return true if all size classes meet the requirements for alignment +// ordering and min and max values. +bool SizeMap::ValidSizeClasses(int num_classes, const SizeClassInfo* parsed) { + if (num_classes <= 0) { + return false; + } + for (int c = 1; c < num_classes; c++) { + size_t class_size = parsed[c].size; + size_t pages = parsed[c].pages; + size_t num_objects_to_move = parsed[c].num_to_move; + // Each size class must be larger than the previous size class. + if (class_size <= parsed[c - 1].size) { + Log(kLog, __FILE__, __LINE__, "Non-increasing size class", c, + parsed[c - 1].size, class_size); + return false; + } + if (class_size > kMaxSize) { + Log(kLog, __FILE__, __LINE__, "size class too big", c, class_size, + kMaxSize); + return false; + } + // Check required alignment + size_t alignment = 128; + if (class_size <= kMultiPageSize) { + alignment = kAlignment; + } else if (class_size <= SizeMap::kMaxSmallSize) { + alignment = kMultiPageAlignment; + } + if ((class_size & (alignment - 1)) != 0) { + Log(kLog, __FILE__, __LINE__, "Not aligned properly", c, class_size, + alignment); + return false; + } + if (class_size <= kMultiPageSize && pages != 1) { + Log(kLog, __FILE__, __LINE__, "Multiple pages not allowed", class_size, + pages, kMultiPageSize); + return false; + } + if (pages >= 256) { + Log(kLog, __FILE__, __LINE__, "pages limited to 255", pages); + return false; + } + if (num_objects_to_move > kMaxObjectsToMove) { + Log(kLog, __FILE__, __LINE__, "num objects to move too large", + num_objects_to_move, kMaxObjectsToMove); + return false; + } + } + // Last size class must be able to hold kMaxSize. + if (parsed[num_classes - 1].size < kMaxSize) { + Log(kLog, __FILE__, __LINE__, "last class doesn't cover kMaxSize", + num_classes - 1, parsed[num_classes - 1].size, kMaxSize); + return false; + } + return true; +} + +// Initialize the mapping arrays +void SizeMap::Init() { + // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] + if (ClassIndex(0) != 0) { + Log(kCrash, __FILE__, __LINE__, + "Invalid class index for size 0", ClassIndex(0)); + } + if (ClassIndex(kMaxSize) >= sizeof(class_array_)) { + Log(kCrash, __FILE__, __LINE__, + "Invalid class index for kMaxSize", ClassIndex(kMaxSize)); + } + + static_assert(kAlignment <= 16, "kAlignment is too large"); + + if (IsExperimentActive(Experiment::TCMALLOC_SANS_56_SIZECLASS)) { + SetSizeClasses(kNumClasses, kExperimentalSizeClasses); + } else { + SetSizeClasses(kNumClasses, kSizeClasses); + } + MaybeRunTimeSizeClasses(); + + int next_size = 0; + for (int c = 1; c < kNumClasses; c++) { + const int max_size_in_class = class_to_size_[c]; + + for (int s = next_size; s <= max_size_in_class; s += kAlignment) { + class_array_[ClassIndex(s)] = c; + } + next_size = max_size_in_class + kAlignment; + if (next_size > kMaxSize) { + break; + } + } +} + +} // namespace tcmalloc diff --git a/tcmalloc/common.h b/tcmalloc/common.h new file mode 100644 index 000000000..81344b305 --- /dev/null +++ b/tcmalloc/common.h @@ -0,0 +1,455 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Common definitions for tcmalloc code. + +#ifndef TCMALLOC_COMMON_H_ +#define TCMALLOC_COMMON_H_ + +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/optimization.h" +#include "tcmalloc/internal/bits.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/size_class_info.h" + +// Type that can hold a page number +typedef uintptr_t PageID; + +// Type that can hold the length of a run of pages +typedef uintptr_t Length; + +//------------------------------------------------------------------- +// Configuration +//------------------------------------------------------------------- + +// There are four different models for tcmalloc which are created by defining a +// set of constant variables differently: +// +// DEFAULT: +// The default configuration strives for good performance while trying to +// minimize fragmentation. It uses a smaller page size to reduce +// fragmentation, but allocates per-thread and per-cpu capacities similar to +// TCMALLOC_LARGE_PAGES / TCMALLOC_256K_PAGES. +// +// TCMALLOC_LARGE_PAGES: +// Larger page sizes increase the bookkeeping granularity used by TCMalloc for +// its allocations. This can reduce PageMap size and traffic to the +// innermost cache (the page heap), but can increase memory footprints. As +// TCMalloc will not reuse a page for a different allocation size until the +// entire page is deallocated, this can be a source of increased memory +// fragmentation. +// +// Historically, larger page sizes improved lookup performance for the +// pointer-to-size lookup in the PageMap that was part of the critical path. +// With most deallocations leveraging C++14's sized delete feature +// (https://isocpp.org/files/papers/n3778.html), this optimization is less +// significant. +// +// TCMALLOC_256K_PAGES +// This configuration uses an even larger page size (256KB) as the unit of +// accounting granularity. +// +// TCMALLOC_SMALL_BUT_SLOW: +// Used for situations where minimizing the memory footprint is the most +// desirable attribute, even at the cost of performance. +// +// The constants that vary between models are: +// +// kPageShift - Shift amount used to compute the page size. +// kNumClasses - Number of size classes serviced by bucket allocators +// kMaxSize - Maximum size serviced by bucket allocators (thread/cpu/central) +// kMinThreadCacheSize - The minimum size in bytes of each ThreadCache. +// kMaxThreadCacheSize - The maximum size in bytes of each ThreadCache. +// kDefaultOverallThreadCacheSize - The maximum combined size in bytes of all +// ThreadCaches for an executable. +// kStealAmount - The number of bytes one ThreadCache will steal from another +// when the first ThreadCache is forced to Scavenge(), delaying the next +// call to Scavenge for this thread. + +// Older configurations had their own customized macros. Convert them into +// a page-shift parameter that is checked below. + +#ifndef TCMALLOC_PAGE_SHIFT +#ifdef TCMALLOC_SMALL_BUT_SLOW +#define TCMALLOC_PAGE_SHIFT 12 +#define TCMALLOC_USE_PAGEMAP3 +#elif defined(TCMALLOC_256K_PAGES) +#define TCMALLOC_PAGE_SHIFT 18 +#elif defined(TCMALLOC_LARGE_PAGES) +#define TCMALLOC_PAGE_SHIFT 15 +#else +#define TCMALLOC_PAGE_SHIFT 13 +#endif +#else +#error "TCMALLOC_PAGE_SHIFT is an internal macro!" +#endif + +#if TCMALLOC_PAGE_SHIFT == 12 +static const size_t kPageShift = 12; +static const size_t kNumClasses = 46; +static const size_t kMaxSize = 8 << 10; +static const size_t kMinThreadCacheSize = 4 * 1024; +static const size_t kMaxThreadCacheSize = 64 * 1024; +static const size_t kMaxCpuCacheSize = 20 * 1024; +static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize; +static const size_t kStealAmount = kMinThreadCacheSize; +static const size_t kDefaultProfileSamplingRate = 1 << 19; +static const size_t kMinPages = 2; +#elif TCMALLOC_PAGE_SHIFT == 15 +static const size_t kPageShift = 15; +static const size_t kNumClasses = 78; +static const size_t kMaxSize = 256 * 1024; +static const size_t kMinThreadCacheSize = kMaxSize * 2; +static const size_t kMaxThreadCacheSize = 4 << 20; +static const size_t kMaxCpuCacheSize = 3 * 1024 * 1024; +static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize; +static const size_t kStealAmount = 1 << 16; +static const size_t kDefaultProfileSamplingRate = 1 << 21; +static const size_t kMinPages = 8; +#elif TCMALLOC_PAGE_SHIFT == 18 +static const size_t kPageShift = 18; +static const size_t kNumClasses = 89; +static const size_t kMaxSize = 256 * 1024; +static const size_t kMinThreadCacheSize = kMaxSize * 2; +static const size_t kMaxThreadCacheSize = 4 << 20; +static const size_t kMaxCpuCacheSize = 3 * 1024 * 1024; +static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize; +static const size_t kStealAmount = 1 << 16; +static const size_t kDefaultProfileSamplingRate = 1 << 21; +static const size_t kMinPages = 8; +#elif TCMALLOC_PAGE_SHIFT == 13 +static const size_t kPageShift = 13; +static const size_t kNumClasses = 86; +static const size_t kMaxSize = 256 * 1024; +static const size_t kMinThreadCacheSize = kMaxSize * 2; +static const size_t kMaxThreadCacheSize = 4 << 20; +static const size_t kMaxCpuCacheSize = 3 * 1024 * 1024; +static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize; +static const size_t kStealAmount = 1 << 16; +static const size_t kDefaultProfileSamplingRate = 1 << 21; +static const size_t kMinPages = 8; +#else +#error "Unsupported TCMALLOC_PAGE_SHIFT value!" +#endif + +// Minimum/maximum number of batches in TransferCache per size class. +// Actual numbers depends on a number of factors, see TransferCache::Init +// for details. +static const size_t kMinObjectsToMove = 2; +static const size_t kMaxObjectsToMove = 128; + +static const size_t kPageSize = 1 << kPageShift; +// Verify that the page size used is at least 8x smaller than the maximum +// element size in the thread cache. This guarantees at most 12.5% internal +// fragmentation (1/8). When page size is 256k (kPageShift == 18), the benefit +// of increasing kMaxSize to be multiple of kPageSize is unclear. Object size +// profile data indicates that the number of simultaneously live objects (of +// size >= 256k) tends to be very small. Keeping those objects as 'large' +// objects won't cause too much memory waste, while heap memory reuse is can be +// improved. Increasing kMaxSize to be too large has another bad side effect -- +// the thread cache pressure is increased, which will in turn increase traffic +// between central cache and thread cache, leading to performance degradation. +static_assert((kMaxSize / kPageSize) >= kMinPages || kPageShift >= 18, + "Ratio of kMaxSize / kPageSize is too small"); + +static const size_t kAlignment = 8; +// log2 (kAlignment) +static const size_t kAlignmentShift = + tcmalloc::tcmalloc_internal::Bits::Log2Ceiling(kAlignment); +// For all span-lengths < kMaxPages we keep an exact-size list. +static const size_t kMaxPages = 1 << (20 - kPageShift); + +// The number of times that a deallocation can cause a freelist to +// go over its max_length() before shrinking max_length(). +static const int kMaxOverages = 3; + +// Maximum length we allow a per-thread free-list to have before we +// move objects from it into the corresponding central free-list. We +// want this big to avoid locking the central free-list too often. It +// should not hurt to make this list somewhat big because the +// scavenging code will shrink it down when its contents are not in use. +static const int kMaxDynamicFreeListLength = 8192; + +static const Length kMaxValidPages = (~static_cast(0)) >> kPageShift; + +#if defined __x86_64__ +// All current and planned x86_64 processors only look at the lower 48 bits +// in virtual to physical address translation. The top 16 are thus unused. +// TODO(b/134686025): Under what operating systems can we increase it safely to +// 17? This lets us use smaller page maps. On first allocation, a 36-bit page +// map uses only 96 KB instead of the 4.5 MB used by a 52-bit page map. +static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48); +#elif defined __powerpc64__ && defined __linux__ +// Linux(4.12 and above) on powerpc64 supports 128TB user virtual address space +// by default, and up to 512TB if user space opts in by specifing hint in mmap. +// See comments in arch/powerpc/include/asm/processor.h +// and arch/powerpc/mm/mmap.c. +static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 49); +#elif defined __aarch64__ && defined __linux__ +// According to Documentation/arm64/memory.txt of kernel 3.16, +// AARCH64 kernel supports 48-bit virtual addresses for both user and kernel. +static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48); +#else +static const int kAddressBits = 8 * sizeof(void*); +#endif + +namespace tcmalloc { +#if defined(__x86_64__) +// x86 has 2 MiB huge pages +static const size_t kHugePageShift = 21; +#elif defined(__PPC64__) +static const size_t kHugePageShift = 24; +#elif defined __aarch64__ && defined __linux__ +static const size_t kHugePageShift = 21; +#else +// ...whatever, guess something big-ish +static const size_t kHugePageShift = 21; +#endif + +static const size_t kHugePageSize = static_cast(1) << kHugePageShift; +static const size_t kPagesPerHugePage = static_cast(1) + << (kHugePageShift - kPageShift); +static constexpr uintptr_t kTagMask = uintptr_t{1} + << std::min(kAddressBits - 4, 42); + +#if !defined(TCMALLOC_SMALL_BUT_SLOW) && __WORDSIZE != 32 +// Always allocate at least a huge page +static const size_t kMinSystemAlloc = kHugePageSize; +static const size_t kMinMmapAlloc = 1 << 30; // mmap() in 1GiB ranges. +#else +// Allocate in units of 2MiB. This is the size of a huge page for x86, but +// not for Power. +static const size_t kMinSystemAlloc = 2 << 20; +// mmap() in units of 32MiB. This is a multiple of huge page size for +// both x86 (2MiB) and Power (16MiB) +static const size_t kMinMmapAlloc = 32 << 20; +#endif + +static_assert(kMinMmapAlloc % kMinSystemAlloc == 0, + "Minimum mmap allocation size is not a multiple of" + " minimum system allocation size"); + +// Convert byte size into pages. This won't overflow, but may return +// an unreasonably large value if bytes is huge enough. +inline Length pages(size_t bytes) { + return (bytes >> kPageShift) + + ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); +} + +// Returns true if ptr is tagged. +inline bool IsTaggedMemory(const void* ptr) { + return (reinterpret_cast(ptr) & kTagMask) == 0; +} + +// Size-class information + mapping +class SizeMap { + public: + // All size classes <= 512 in all configs always have 1 page spans. + static const size_t kMultiPageSize = 512; + // Min alignment for all size classes > kMultiPageSize in all configs. + static const size_t kMultiPageAlignment = 64; + // log2 (kMultiPageAlignment) + static const size_t kMultiPageAlignmentShift = + tcmalloc::tcmalloc_internal::Bits::Log2Ceiling(kMultiPageAlignment); + + private: + //------------------------------------------------------------------- + // Mapping from size to size_class and vice versa + //------------------------------------------------------------------- + + // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an + // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128. + // So for these larger sizes we have an array indexed by ceil(size/128). + // + // We flatten both logical arrays into one physical array and use + // arithmetic to compute an appropriate index. The constants used by + // ClassIndex() were selected to make the flattening work. + // + // Examples: + // Size Expression Index + // ------------------------------------------------------- + // 0 (0 + 7) / 8 0 + // 1 (1 + 7) / 8 1 + // ... + // 1024 (1024 + 7) / 8 128 + // 1025 (1025 + 127 + (120<<7)) / 128 129 + // ... + // 32768 (32768 + 127 + (120<<7)) / 128 376 + static const int kMaxSmallSize = 1024; + static const size_t kClassArraySize = + ((kMaxSize + 127 + (120 << 7)) >> 7) + 1; + + // Batch size is the number of objects to move at once. + typedef unsigned char BatchSize; + + // class_array_ is accessed on every malloc, so is very hot. We make it the + // first member so that it inherits the overall alignment of a SizeMap + // instance. In particular, if we create a SizeMap instance that's cache-line + // aligned, this member is also aligned to the width of a cache line. + unsigned char class_array_[kClassArraySize]; + + // Number of objects to move between a per-thread list and a central + // list in one shot. We want this to be not too small so we can + // amortize the lock overhead for accessing the central list. Making + // it too big may temporarily cause unnecessary memory wastage in the + // per-thread free list until the scavenger cleans up the list. + BatchSize num_objects_to_move_[kNumClasses]; + + // If size is no more than kMaxSize, compute index of the + // class_array[] entry for it, putting the class index in output + // parameter idx and returning true. Otherwise return false. + static inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE ClassIndexMaybe(size_t s, + uint32_t* idx) { + if (ABSL_PREDICT_TRUE(s <= kMaxSmallSize)) { + *idx = (static_cast(s) + 7) >> 3; + return true; + } else if (s <= kMaxSize) { + *idx = (static_cast(s) + 127 + (120 << 7)) >> 7; + return true; + } + return false; + } + + static inline size_t ClassIndex(size_t s) { + uint32_t ret; + CHECK_CONDITION(ClassIndexMaybe(s, &ret)); + return ret; + } + + // Mapping from size class to number of pages to allocate at a time + unsigned char class_to_pages_[kNumClasses]; + + // Mapping from size class to max size storable in that class + uint32_t class_to_size_[kNumClasses]; + + // If environment variable defined, use it to override sizes classes. + // Returns true if all classes defined correctly. + bool MaybeRunTimeSizeClasses(); + + protected: + // Set the give size classes to be used by TCMalloc. + void SetSizeClasses(int num_classes, const SizeClassInfo* parsed); + + // Check that the size classes meet all requirements. + bool ValidSizeClasses(int num_classes, const SizeClassInfo* parsed); + + // Definition of size class that is set in size_classes.cc + static const SizeClassInfo kSizeClasses[kNumClasses]; + + // Definition of size class that is set in size_classes.cc + static const SizeClassInfo kExperimentalSizeClasses[kNumClasses]; + + public: + // Constructor should do nothing since we rely on explicit Init() + // call, which may or may not be called before the constructor runs. + SizeMap() { } + + // Initialize the mapping arrays + void Init(); + + // Returns the non-zero matching size class for the provided `size`. + // Returns true on success, returns false if `size` exceeds the maximum size + // class value `kMaxSize'. + // Important: this function may return true with *cl == 0 if this + // SizeMap instance has not (yet) been initialized. + inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE GetSizeClass(size_t size, + uint32_t* cl) { + uint32_t idx; + if (ABSL_PREDICT_TRUE(ClassIndexMaybe(size, &idx))) { + *cl = class_array_[idx]; + return true; + } + return false; + } + + // Returns the size class for size `size` aligned at `align` + // Returns true on success. Returns false if either: + // - the size exceeds the maximum size class size. + // - the align size is greater or equal to the default page size + // - no matching properly aligned size class is available + // + // Requires that align is a non-zero power of 2. + // + // Specifying align = 1 will result in this method using the default + // alignment of the size table. Calling this method with a constexpr + // value of align = 1 will be optimized by the compiler, and result in + // the inlined code to be identical to calling `GetSizeClass(size, cl)` + inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE GetSizeClass(size_t size, + size_t align, + uint32_t* cl) { + ASSERT(align > 0); + ASSERT((align & (align - 1)) == 0); + + if (ABSL_PREDICT_FALSE(align >= kPageSize)) { + return false; + } + if (ABSL_PREDICT_FALSE(!GetSizeClass(size, cl))) { + return false; + } + + // Predict that size aligned allocs most often directly map to a proper + // size class, i.e., multiples of 32, 64, etc, matching our class sizes. + const size_t mask = (align - 1); + do { + if (ABSL_PREDICT_TRUE((class_to_size(*cl) & mask) == 0)) { + return true; + } + } while (++*cl < kNumClasses); + + return false; + } + + // Returns size class for given size, or 0 if this instance has not been + // initialized yet. REQUIRES: size <= kMaxSize. + inline size_t ABSL_ATTRIBUTE_ALWAYS_INLINE SizeClass(size_t size) { + ASSERT(size <= kMaxSize); + uint32_t ret = 0; + GetSizeClass(size, &ret); + return ret; + } + + // Get the byte-size for a specified class. REQUIRES: cl <= kNumClasses. + inline size_t ABSL_ATTRIBUTE_ALWAYS_INLINE class_to_size(size_t cl) { + ASSERT(cl < kNumClasses); + return class_to_size_[cl]; + } + + // Mapping from size class to number of pages to allocate at a time + inline size_t class_to_pages(size_t cl) { + ASSERT(cl < kNumClasses); + return class_to_pages_[cl]; + } + + // Number of objects to move between a per-thread list and a central + // list in one shot. We want this to be not too small so we can + // amortize the lock overhead for accessing the central list. Making + // it too big may temporarily cause unnecessary memory wastage in the + // per-thread free list until the scavenger cleans up the list. + inline SizeMap::BatchSize num_objects_to_move(size_t cl) { + ASSERT(cl < kNumClasses); + return num_objects_to_move_[cl]; + } +}; + +// Linker initialized, so this lock can be accessed at any time. +extern absl::base_internal::SpinLock pageheap_lock; + +} // namespace tcmalloc + +#endif // TCMALLOC_COMMON_H_ diff --git a/tcmalloc/copts.bzl b/tcmalloc/copts.bzl new file mode 100644 index 000000000..b04cc1236 --- /dev/null +++ b/tcmalloc/copts.bzl @@ -0,0 +1,38 @@ +# Copyright 2019 The TCMalloc Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This package provides default compiler warning flags for the OSS release""" + +TCMALLOC_LLVM_FLAGS = [ + "-Wno-implicit-int-float-conversion", + "-Wno-sign-compare", + "-Wno-uninitialized", + "-Wno-unused-function", + "-Wno-unused-variable", +] + +TCMALLOC_GCC_FLAGS = [ + "-Wno-attribute-alias", + "-Wno-sign-compare", + "-Wno-uninitialized", + "-Wno-unused-function", + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66425 + "-Wno-unused-result", + "-Wno-unused-variable", +] + +TCMALLOC_DEFAULT_COPTS = select({ + "//tcmalloc:llvm": TCMALLOC_LLVM_FLAGS, + "//conditions:default": TCMALLOC_GCC_FLAGS, +}) diff --git a/tcmalloc/cpu_cache.cc b/tcmalloc/cpu_cache.cc new file mode 100644 index 000000000..1552fcdb3 --- /dev/null +++ b/tcmalloc/cpu_cache.cc @@ -0,0 +1,579 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/cpu_cache.h" + +#include +#include + +#include +#include + +#include "absl/base/dynamic_annotations.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/internal/sysinfo.h" +#include "absl/base/macros.h" +#include "absl/base/thread_annotations.h" +#include "tcmalloc/arena.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal_malloc_extension.h" +#include "tcmalloc/parameters.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/transfer_cache.h" + +namespace tcmalloc { + +using subtle::percpu::GetCurrentCpuUnsafe; + +// MaxCapacity() determines how we distribute memory in the per-cpu cache +// to the various class sizes. +static size_t MaxCapacity(size_t cl) { + // The number of size classes that are commonly used and thus should be + // allocated more slots in the per-cpu cache. + static constexpr size_t kNumSmall = 10; + // The remaining size classes, excluding size class 0. + static constexpr size_t kNumLarge = kNumClasses - 1 - kNumSmall; + // The memory used for each per-CPU slab is the sum of: + // sizeof(std::atomic) * kNumClasses + // sizeof(void*) * (kSmallObjectDepth + 1) * kNumSmall + // sizeof(void*) * (kLargeObjectDepth + 1) * kNumLarge + // + // Class size 0 has MaxCapacity() == 0, which is the reason for using + // kNumClasses - 1 above instead of kNumClasses. + // + // Each Size class region in the slab is preceded by one padding pointer that + // points to itself, because prefetch instructions of invalid pointers are + // slow. That is accounted for by the +1 for object depths. +#if defined(TCMALLOC_SMALL_BUT_SLOW) + // With SMALL_BUT_SLOW we have 4KiB of per-cpu slab and 46 class sizes we + // allocate: + // == 8 * 46 + 8 * ((16 + 1) * 10 + (6 + 1) * 35) = 4038 bytes of 4096 + static const size_t kSmallObjectDepth = 16; + static const size_t kLargeObjectDepth = 6; +#else + // We allocate 256KiB per-cpu for pointers to cached per-cpu memory. + // Each 256KiB is a subtle::percpu::TcmallocSlab::Slabs + // Max(kNumClasses) is 89, so the maximum footprint per CPU is: + // 89 * 8 + 8 * ((2048 + 1) * 10 + (152 + 1) * 78 + 88) = 254 KiB + static const size_t kSmallObjectDepth = 2048; + static const size_t kLargeObjectDepth = 152; +#endif + static_assert(sizeof(std::atomic) * kNumClasses + + sizeof(void *) * (kSmallObjectDepth + 1) * kNumSmall + + sizeof(void *) * (kLargeObjectDepth + 1) * kNumLarge <= + (1 << CPUCache::kPerCpuShift), + "per-CPU memory exceeded"); + if (cl == 0 || cl >= kNumClasses) return 0; + if (cl <= kNumSmall) { + // Small object sizes are very heavily used and need very deep caches for + // good performance (well over 90% of malloc calls are for cl <= 10.) + return kSmallObjectDepth; + } + + return kLargeObjectDepth; +} + +static void *SlabAlloc(size_t size) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return Static::arena()->Alloc(size); +} + +void CPUCache::Activate() { + ASSERT(Static::IsInited()); + int num_cpus = absl::base_internal::NumCPUs(); + + absl::base_internal::SpinLockHolder h(&pageheap_lock); + + resize_ = reinterpret_cast( + Static::arena()->Alloc(sizeof(ResizeInfo) * num_cpus)); + lazy_slabs_ = Parameters::lazy_per_cpu_caches(); + + auto max_cache_size = Parameters::max_per_cpu_cache_size(); + + for (int cpu = 0; cpu < num_cpus; ++cpu) { + for (int cl = 1; cl < kNumClasses; ++cl) { + resize_[cpu].per_class[cl].Init(); + } + resize_[cpu].available.store(max_cache_size, std::memory_order_relaxed); + resize_[cpu].last_steal.store(1, std::memory_order_relaxed); + } + + freelist_.Init(SlabAlloc, MaxCapacity, lazy_slabs_); + Static::ActivateCPUCache(); +} + +// Fetch more items from the central cache, refill our local cache, +// and try to grow it if necessary. +// +// This is complicated by the fact that we can only tweak the cache on +// our current CPU and we might get migrated whenever (in fact, we +// might already have been migrated since failing to get memory...) +// +// So make sure only to make changes to one CPU's cache; at all times, +// it must be safe to find ourselves migrated (at which point we atomically +// return memory to the correct CPU.) +void *CPUCache::Refill(int cpu, size_t cl) { + const size_t batch_length = Static::sizemap()->num_objects_to_move(cl); + + // UpdateCapacity can evict objects from other size classes as it tries to + // increase capacity of this size class. The objects are returned in + // to_return, we insert them into transfer cache at the end of function + // (to increase possibility that we stay on the current CPU as we are + // refilling the list). + size_t returned = 0; + ObjectClass to_return[kNumClasses]; + const size_t target = + UpdateCapacity(cpu, cl, batch_length, false, to_return, &returned); + + // Refill target objects in batch_length batches. + size_t total = 0; + size_t got; + size_t i; + void *result = nullptr; + void *batch[kMaxObjectsToMove]; + do { + const size_t want = std::min(batch_length, target - total); + got = Static::transfer_cache()[cl].RemoveRange(batch, want); + if (got == 0) { + break; + } + total += got; + i = got; + if (result == nullptr) { + i--; + result = batch[i]; + } + if (i) { + i -= freelist_.PushBatch(cl, batch, i); + if (i != 0) { + static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove, + "not enough space in batch"); + Static::transfer_cache()[cl].InsertRange(absl::Span(batch), i); + } + } + } while (got == batch_length && i == 0 && total < target && + cpu == GetCurrentCpuUnsafe()); + + for (size_t i = 0; i < returned; ++i) { + ObjectClass *ret = &to_return[i]; + Static::transfer_cache()[ret->cl].InsertRange( + absl::Span(&ret->obj, 1), 1); + } + + return result; +} + +size_t CPUCache::UpdateCapacity(int cpu, size_t cl, size_t batch_length, + bool overflow, ObjectClass *to_return, + size_t *returned) { + // Freelist size balancing strategy: + // - We grow a size class only on overflow/underflow. + // - We shrink size classes in Steal as it scans all size classes. + // - If overflows/underflows happen on a size class, we want to grow its + // capacity to at least 2 * batch_length. It enables usage of the + // transfer cache and leaves the list half-full after we insert/remove + // a batch from the transfer cache. + // - We increase capacity beyond 2 * batch_length only when an overflow is + // followed by an underflow. That's the only case when we could benefit + // from larger capacity -- the overflow and the underflow would collapse. + // + // Note: we can't understand when we have a perfectly-sized list, because for + // a perfectly-sized list we don't hit any slow paths which looks the same as + // inactive list. Eventually we will shrink a perfectly-sized list a bit and + // then it will grow back. This won't happen very frequently for the most + // important small sizes, because we will need several ticks before we shrink + // it again. Also we will shrink it by 1, but grow by a batch. So we should + // have lots of time until we need to grow it again. + + const size_t max_capacity = MaxCapacity(cl); + size_t capacity = freelist_.Capacity(cpu, cl); + // We assert that the return value, target, is non-zero, so starting from an + // initial capacity of zero means we may be populating this core for the + // first time. + absl::base_internal::LowLevelCallOnce( + &resize_[cpu].initialized, + [](CPUCache *cache, int cpu) { + if (cache->lazy_slabs_) { + absl::base_internal::SpinLockHolder h(&cache->resize_[cpu].lock); + cache->freelist_.InitCPU(cpu, MaxCapacity); + } + + // While we could unconditionally store, a lazy slab population + // implementation will require evaluating a branch. + cache->resize_[cpu].populated.store(true, std::memory_order_relaxed); + }, + this, cpu); + const bool grow_by_one = capacity < 2 * batch_length; + uint32_t successive = 0; + bool grow_by_batch = + resize_[cpu].per_class[cl].Update(overflow, grow_by_one, &successive); + if ((grow_by_one || grow_by_batch) && capacity != max_capacity) { + size_t increase = 1; + if (grow_by_batch) { + increase = std::min(batch_length, max_capacity - capacity); + } else if (!overflow && capacity < batch_length) { + // On underflow we want to grow to at least batch size, because that's + // what we want to request from transfer cache. + increase = batch_length - capacity; + } + Grow(cpu, cl, increase, to_return, returned); + capacity = freelist_.Capacity(cpu, cl); + } + // Calculate number of objects to return/request from transfer cache. + // Generally we prefer to transfer a single batch, because transfer cache + // handles it efficiently. Except for 2 special cases: + size_t target = batch_length; + // "capacity + 1" because on overflow we already have one object from caller, + // so we can return a whole batch even if capacity is one less. Similarly, + // on underflow we need to return one object to caller, so we can request + // a whole batch even if capacity is one less. + if ((capacity + 1) < batch_length) { + // If we don't have a full batch, return/request just half. We are missing + // transfer cache anyway, and cost of insertion into central freelist is + // ~O(number of objects). + target = std::max(1, (capacity + 1) / 2); + } else if (successive > 0 && capacity >= 3 * batch_length) { + // If the freelist is large and we are hitting series of overflows or + // underflows, return/request several batches at once. On the first overflow + // we return 1 batch, on the second -- 2, on the third -- 4 and so on up to + // half of the batches we have. We do this to save on the cost of hitting + // malloc/free slow path, reduce instruction cache pollution, avoid cache + // misses when accessing transfer/central caches, etc. + size_t num_batches = + std::min(1 << std::min(successive, 10), + ((capacity / batch_length) + 1) / 2); + target = num_batches * batch_length; + } + ASSERT(target != 0); + return target; +} + +void CPUCache::Grow(int cpu, size_t cl, size_t desired_increase, + ObjectClass *to_return, size_t *returned) { + const size_t size = Static::sizemap()->class_to_size(cl); + const size_t desired_bytes = desired_increase * size; + size_t acquired_bytes; + + // First, there might be unreserved slack. Take what we can. + size_t before, after; + do { + before = resize_[cpu].available.load(std::memory_order_relaxed); + acquired_bytes = std::min(before, desired_bytes); + after = before - acquired_bytes; + } while (!resize_[cpu].available.compare_exchange_strong( + before, after, std::memory_order_relaxed, std::memory_order_relaxed)); + + if (acquired_bytes < desired_bytes) { + acquired_bytes += + Steal(cpu, cl, desired_bytes - acquired_bytes, to_return, returned); + } + + // We have all the memory we could reserve. Time to actually do the growth. + + // We might have gotten more than we wanted (stealing from larger sizeclasses) + // so don't grow _too_ much. + size_t actual_increase = acquired_bytes / size; + actual_increase = std::min(actual_increase, desired_increase); + // Remember, Grow may not give us all we ask for. + size_t increase = freelist_.Grow(cpu, cl, actual_increase, MaxCapacity(cl)); + size_t increased_bytes = increase * size; + if (increased_bytes < acquired_bytes) { + // return whatever we didn't use to the slack. + size_t unused = acquired_bytes - increased_bytes; + resize_[cpu].available.fetch_add(unused, std::memory_order_relaxed); + } +} + +// There are rather a lot of policy knobs we could tweak here. +size_t CPUCache::Steal(int cpu, size_t dest_cl, size_t bytes, + ObjectClass *to_return, size_t *returned) { + // Steal from other sizeclasses. Try to go in a nice circle. + // Complicated by sizeclasses actually being 1-indexed. + size_t acquired = 0; + size_t start = resize_[cpu].last_steal.load(std::memory_order_relaxed); + ASSERT(start < kNumClasses); + ASSERT(0 < start); + size_t source_cl = start; + for (size_t offset = 1; offset < kNumClasses; ++offset) { + source_cl = start + offset; + if (source_cl >= kNumClasses) { + source_cl -= kNumClasses - 1; + } + ASSERT(0 < source_cl); + ASSERT(source_cl < kNumClasses); + // Decide if we want to steal source_cl. + if (source_cl == dest_cl) { + // First, no sense in picking your own pocket. + continue; + } + const size_t capacity = freelist_.Capacity(cpu, source_cl); + if (capacity == 0) { + // Nothing to steal. + continue; + } + const size_t length = freelist_.Length(cpu, source_cl); + const size_t batch_length = + Static::sizemap()->num_objects_to_move(source_cl); + size_t size = Static::sizemap()->class_to_size(source_cl); + + // Clock-like algorithm to prioritize size classes for shrinking. + // + // Each size class has quiescent ticks counter which is incremented as we + // pass it, the counter is reset to 0 in UpdateCapacity on grow. + // If the counter value is 0, then we've just tried to grow the size class, + // so it makes little sense to shrink it back. The higher counter value + // the longer ago we grew the list and the more probable it is that + // the full capacity is unused. + // + // Then, we calculate "shrinking score", the higher the score the less we + // we want to shrink this size class. The score is considerably skewed + // towards larger size classes: smaller classes are usually used more + // actively and we also benefit less from shrinking smaller classes (steal + // less capacity). Then, we also avoid shrinking full freelists as we will + // need to evict an object and then go to the central freelist to return it. + // Then, we also avoid shrinking freelists that are just above batch size, + // because shrinking them will disable transfer cache. + // + // Finally, we shrink if the ticks counter is >= the score. + uint32_t qticks = resize_[cpu].per_class[source_cl].Tick(); + uint32_t score = 0; + // Note: the following numbers are based solely on intuition, common sense + // and benchmarking results. + if (size <= 144) { + score = 2 + (length >= capacity) + + (length >= batch_length && length < 2 * batch_length); + } else if (size <= 1024) { + score = 1 + (length >= capacity) + + (length >= batch_length && length < 2 * batch_length); + } else if (size <= (64 << 10)) { + score = (length >= capacity); + } + if (score > qticks) { + continue; + } + + if (length >= capacity) { + // The list is full, need to evict an object to shrink it. + if (to_return == nullptr) { + continue; + } + void *obj = freelist_.Pop(source_cl, NoopUnderflow); + if (obj) { + ObjectClass *ret = &to_return[*returned]; + ++(*returned); + ret->cl = source_cl; + ret->obj = obj; + } + } + + // Finally, try to shrink (can fail if we were migrated). + // We always shrink by 1 object. The idea is that inactive lists will be + // shrunk to zero eventually anyway (or they just would not grow in the + // first place), but for active lists it does not make sense to aggressively + // shuffle capacity all the time. + if (freelist_.Shrink(cpu, source_cl, 1) == 1) { + acquired += size; + } + + if (cpu != GetCurrentCpuUnsafe() || acquired >= bytes) { + // can't steal any more or don't need to + break; + } + } + // update the hint + resize_[cpu].last_steal.store(source_cl, std::memory_order_relaxed); + return acquired; +} + +int CPUCache::Overflow(void *ptr, size_t cl, int cpu) { + const size_t batch_length = Static::sizemap()->num_objects_to_move(cl); + const size_t target = + UpdateCapacity(cpu, cl, batch_length, true, nullptr, nullptr); + // Return target objects in batch_length batches. + size_t total = 0; + size_t count = 1; + void *batch[kMaxObjectsToMove]; + batch[0] = ptr; + do { + size_t want = std::min(batch_length, target - total); + if (count < want) { + count += freelist_.PopBatch(cl, batch + count, want - count); + } + if (!count) break; + + total += count; + static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove, + "not enough space in batch"); + Static::transfer_cache()[cl].InsertRange(absl::Span(batch), count); + if (count != batch_length) break; + count = 0; + } while (total < target && cpu == GetCurrentCpuUnsafe()); + tracking::Report(kFreeTruncations, cl, 1); + return 1; +} + +uint64_t CPUCache::UsedBytes(int target_cpu) const { + ASSERT(target_cpu >= 0); + uint64_t total = 0; + for (int cl = 1; cl < kNumClasses; cl++) { + int size = Static::sizemap()->class_to_size(cl); + total += size * freelist_.Length(target_cpu, cl); + } + return total; +} + +bool CPUCache::HasPopulated(int target_cpu) const { + ASSERT(target_cpu >= 0); + return resize_[target_cpu].populated.load(std::memory_order_relaxed); +} + +PerCPUMetadataState CPUCache::MetadataMemoryUsage() const { + return freelist_.MetadataMemoryUsage(); +} + +uint64_t CPUCache::TotalUsedBytes() const { + uint64_t total = 0; + for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; + ++cpu) { + total += UsedBytes(cpu); + } + return total; +} + +uint64_t CPUCache::TotalObjectsOfClass(size_t cl) const { + ASSERT(cl < kNumClasses); + uint64_t total_objects = 0; + if (cl > 0) { + for (int cpu = 0; cpu < absl::base_internal::NumCPUs(); cpu++) { + total_objects += freelist_.Length(cpu, cl); + } + } + return total_objects; +} + +uint64_t CPUCache::Unallocated(int cpu) const { + return resize_[cpu].available.load(std::memory_order_relaxed); +} + +uint64_t CPUCache::CacheLimit() const { + return Parameters::max_per_cpu_cache_size(); +} + +struct DrainContext { + std::atomic *available; + uint64_t bytes; +}; + +static void DrainHandler(void *arg, size_t cl, void **batch, size_t count, + size_t cap) { + DrainContext *ctx = static_cast(arg); + const size_t size = Static::sizemap()->class_to_size(cl); + const size_t batch_length = Static::sizemap()->num_objects_to_move(cl); + ctx->bytes += count * size; + // Drain resets capacity to 0, so return the allocated capacity to that + // CPU's slack. + ctx->available->fetch_add(cap * size, std::memory_order_relaxed); + for (size_t i = 0; i < count; i += batch_length) { + size_t n = std::min(batch_length, count - i); + Static::transfer_cache()[cl].InsertRange(absl::Span(batch + i, n), + n); + } +} + +uint64_t CPUCache::Reclaim(int cpu) { + absl::base_internal::SpinLockHolder h(&resize_[cpu].lock); + + // If we haven't populated this core, freelist_.Drain() will touch the memory + // (for writing) as part of its locking process. Avoid faulting new pages as + // part of a release process. + if (!resize_[cpu].populated.load(std::memory_order_relaxed)) { + return 0; + } + + DrainContext ctx{&resize_[cpu].available, 0}; + freelist_.Drain(cpu, &ctx, DrainHandler); + return ctx.bytes; +} + +void CPUCache::PerClassResizeInfo::Init() { + state_.store(0, std::memory_order_relaxed); +} + +bool CPUCache::PerClassResizeInfo::Update(bool overflow, bool grow, + uint32_t *successive) { + int32_t raw = state_.load(std::memory_order_relaxed); + State state; + memcpy(&state, &raw, sizeof(state)); + const bool overflow_then_underflow = !overflow && state.overflow; + grow |= overflow_then_underflow; + // Reset quiescent ticks for Steal clock algorithm if we are going to grow. + State new_state; + new_state.overflow = overflow; + new_state.quiescent_ticks = grow ? 0 : state.quiescent_ticks; + new_state.successive = overflow == state.overflow ? state.successive + 1 : 0; + memcpy(&raw, &new_state, sizeof(raw)); + state_.store(raw, std::memory_order_relaxed); + *successive = new_state.successive; + return overflow_then_underflow; +} + +uint32_t CPUCache::PerClassResizeInfo::Tick() { + int32_t raw = state_.load(std::memory_order_relaxed); + State state; + memcpy(&state, &raw, sizeof(state)); + state.quiescent_ticks++; + memcpy(&raw, &state, sizeof(raw)); + state_.store(raw, std::memory_order_relaxed); + return state.quiescent_ticks - 1; +} + +static void ActivatePerCPUCaches() { + // RunningOnValgrind is a proxy for "is something intercepting malloc." + // + // If Valgrind, et. al., are in use, TCMalloc isn't in use and we shouldn't + // activate our per-CPU caches. + if (RunningOnValgrind()) { + return; + } + if (Parameters::per_cpu_caches() && subtle::percpu::IsFast()) { + Static::InitIfNecessary(); + Static::cpu_cache()->Activate(); + // no need for this thread cache anymore, I guess. + ThreadCache::BecomeIdle(); + // If there's a problem with this code, let's notice it right away: + ::operator delete(::operator new(1)); + } +} + +class PerCPUInitializer { + public: + PerCPUInitializer() { + ActivatePerCPUCaches(); + } +}; +static PerCPUInitializer module_enter_exit; + +} // namespace tcmalloc + +extern "C" bool MallocExtension_Internal_GetPerCpuCachesActive() { + return tcmalloc::Static::CPUCacheActive(); +} + +extern "C" int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize() { + return tcmalloc::Parameters::max_per_cpu_cache_size(); +} + +extern "C" void MallocExtension_Internal_SetMaxPerCpuCacheSize(int32_t value) { + tcmalloc::Parameters::set_max_per_cpu_cache_size(value); +} diff --git a/tcmalloc/cpu_cache.h b/tcmalloc/cpu_cache.h new file mode 100644 index 000000000..0bce6aa9c --- /dev/null +++ b/tcmalloc/cpu_cache.h @@ -0,0 +1,237 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_CPU_CACHE_H_ +#define TCMALLOC_CPU_CACHE_H_ + +#include +#include + +#include + +#include "absl/base/attributes.h" +#include "absl/base/call_once.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/optimization.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/percpu.h" +#include "tcmalloc/percpu_tcmalloc.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/thread_cache.h" +#include "tcmalloc/tracking.h" + +namespace tcmalloc { + + +class CPUCache { + public: + // tcmalloc explicitly initializes its global state (to be safe for + // use in global constructors) so our constructor must be trivial; + // do all initialization here instead. + void Activate(); + + // Allocate an object of the given size class. When allocation fails + // (from this cache and after running Refill), OOOHandler(size) is + // called and its return value is returned from + // Allocate. OOOHandler is used to parameterize out-of-memory + // handling (raising exception, returning nullptr, calling + // new_handler or anything else). "Passing" OOOHandler in this way + // allows Allocate to be used in tail-call position in fast-path, + // making Allocate use jump (tail-call) to slow path code. + template + void *Allocate(size_t cl); + + // Free an object of the given class. + void Deallocate(void *ptr, size_t cl); + + // Give the number of bytes in 's cache + uint64_t UsedBytes(int cpu) const; + + // Whether 's cache has ever been populated with objects + bool HasPopulated(int cpu) const; + + PerCPUMetadataState MetadataMemoryUsage() const; + + // Give the number of bytes used in all cpu caches. + uint64_t TotalUsedBytes() const; + + // Give the number of objects of a given class in all cpu caches. + uint64_t TotalObjectsOfClass(size_t cl) const; + + // Give the number of bytes unallocated to any sizeclass in 's cache. + uint64_t Unallocated(int cpu) const; + + // Give the per-cpu limit of cache size. + uint64_t CacheLimit() const; + + // Empty out the cache on ; move all objects to the central + // cache. (If other threads run concurrently on that cpu, we can't + // guarantee it will be fully empty on return, but if the cpu is + // unused, this will eliminate stranded memory.) Returns the number + // of bytes we sent back. This function is thread safe. + uint64_t Reclaim(int cpu); + + // Determine number of bits we should use for allocating per-cpu cache + // The amount of per-cpu cache is 2 ^ kPerCpuShift +#if defined(TCMALLOC_SMALL_BUT_SLOW) + static const size_t kPerCpuShift = 12; +#else + static const size_t kPerCpuShift = 18; +#endif + + private: + // Per-size-class freelist resizing info. + class PerClassResizeInfo { + public: + void Init(); + // Updates info on overflow/underflow. + // says if it's overflow or underflow. + // is caller approximation of whether we want to grow capacity. + // will contain number of successive overflows/underflows. + // Returns if capacity needs to be grown aggressively (i.e. by batch size). + bool Update(bool overflow, bool grow, uint32_t *successive); + uint32_t Tick(); + + private: + std::atomic state_; + // state_ layout: + struct State { + // last overflow/underflow? + uint32_t overflow : 1; + // number of times Steal checked this class since the last grow + uint32_t quiescent_ticks : 15; + // number of successive overflows/underflows + uint32_t successive : 16; + }; + static_assert(sizeof(State) == sizeof(std::atomic), + "size mismatch"); + }; + + subtle::percpu::TcmallocSlab freelist_; + + struct ResizeInfoUnpadded { + // cache space on this CPU we're not using. Modify atomically; + // we don't want to lose space. + std::atomic available; + // this is just a hint + std::atomic last_steal; + // Track whether we have initialized this CPU. + absl::once_flag initialized; + // Track whether we have ever populated this CPU. + std::atomic populated; + // For cross-cpu operations. + absl::base_internal::SpinLock lock; + PerClassResizeInfo per_class[kNumClasses]; + }; + struct ResizeInfo : ResizeInfoUnpadded { + char pad[ABSL_CACHELINE_SIZE - + sizeof(ResizeInfoUnpadded) % ABSL_CACHELINE_SIZE]; + }; + // Tracking data for each CPU's cache resizing efforts. + ResizeInfo *resize_; + // Track whether we are lazily initializing slabs. We cannot use the latest + // value in Parameters, as it can change after initialization. + bool lazy_slabs_; + + struct ObjectClass { + size_t cl; + void *obj; + }; + + void *Refill(int cpu, size_t cl); + + // This is called after finding a full freelist when attempting to push + // on the freelist for sizeclass . The last arg should indicate which + // CPU's list was full. Returns 1. + int Overflow(void *ptr, size_t cl, int cpu); + + // Called on freelist overflow/underflow on to balance cache + // capacity between size classes. Returns number of objects to return/request + // from transfer cache. [0...*returned) will contain objects that + // need to be freed. + size_t UpdateCapacity(int cpu, size_t cl, size_t batch_length, bool overflow, + ObjectClass *to_return, size_t *returned); + + // Tries to obtain up to bytes of freelist space on + // for from other . [0...*returned) will contain objects + // that need to be freed. + void Grow(int cpu, size_t cl, size_t desired_increase, ObjectClass *to_return, + size_t *returned); + + // Tries to steal for on from other size classes on that + // CPU. Returns acquired bytes. [0...*returned) will contain + // objects that need to be freed. + size_t Steal(int cpu, size_t cl, size_t bytes, ObjectClass *to_return, + size_t *returned); + + static void *NoopUnderflow(int cpu, size_t cl) { return nullptr; } + static int NoopOverflow(int cpu, size_t cl, void *item) { return -1; } +}; + +template +inline void *ABSL_ATTRIBUTE_ALWAYS_INLINE CPUCache::Allocate(size_t cl) { + ASSERT(cl > 0); + + tracking::Report(kMallocHit, cl, 1); + struct Helper { + static void *Underflow(int cpu, size_t cl) { + // we've optimistically reported hit in Allocate, lets undo it and + // report miss instead. + tracking::Report(kMallocHit, cl, -1); + tracking::Report(kMallocMiss, cl, 1); + void *ret = Static::cpu_cache()->Refill(cpu, cl); + if (ABSL_PREDICT_FALSE(ret == nullptr)) { + size_t size = Static::sizemap()->class_to_size(cl); + return OOMHandler(size); + } + return ret; + } + }; + return freelist_.Pop(cl, &Helper::Underflow); +} + +inline void ABSL_ATTRIBUTE_ALWAYS_INLINE CPUCache::Deallocate(void *ptr, + size_t cl) { + ASSERT(cl > 0); + tracking::Report(kFreeHit, cl, 1); // Be optimistic; correct later if needed. + + struct Helper { + static int Overflow(int cpu, size_t cl, void *ptr) { + // When we reach here we've already optimistically bumped FreeHits. + // Fix that. + tracking::Report(kFreeHit, cl, -1); + tracking::Report(kFreeMiss, cl, 1); + return Static::cpu_cache()->Overflow(ptr, cl, cpu); + } + }; + freelist_.Push(cl, ptr, Helper::Overflow); +} + +inline bool UsePerCpuCache() { + return (Static::CPUCacheActive() && + // We call IsFast() on every non-fastpath'd malloc or free since + // IsFast() has the side-effect of initializing the per-thread state + // needed for "unsafe" per-cpu operations in case this is the first + // time a new thread is calling into tcmalloc. + // + // If the per-CPU cache for a thread is not initialized, we push + // ourselves onto the slow path (if + // !defined(TCMALLOC_DEPRECATED_PERTHREAD)) until this occurs. See + // fast_alloc's use of TryRecordAllocationFast. + subtle::percpu::IsFast()); +} + +}; // namespace tcmalloc +#endif // TCMALLOC_CPU_CACHE_H_ diff --git a/tcmalloc/experiment.cc b/tcmalloc/experiment.cc new file mode 100644 index 000000000..8fe0edb6c --- /dev/null +++ b/tcmalloc/experiment.cc @@ -0,0 +1,157 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/experiment.h" + +#include + +#include "absl/base/macros.h" +#include "absl/strings/str_cat.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/util.h" + +using tcmalloc::internal::kNumExperiments; +using tcmalloc::tcmalloc_internal::thread_safe_getenv; + +namespace tcmalloc { +namespace { + +const char kDelimiter = ','; +const char kExperiments[] = "BORG_EXPERIMENTS"; +const char kDisableExperiments[] = "BORG_DISABLE_EXPERIMENTS"; +const char kDisableAll[] = "all"; + +bool LookupExperimentID(absl::string_view label, Experiment* exp) { + for (auto config : experiments) { + if (config.name == label) { + *exp = config.id; + return true; + } + } + + return false; +} + +const bool* GetSelectedExperiments() { + static bool by_id[kNumExperiments]; + + static const char* active_experiments = thread_safe_getenv(kExperiments); + static const char* disabled_experiments = + thread_safe_getenv(kDisableExperiments); + static const bool* status = internal::SelectExperiments( + by_id, active_experiments ? active_experiments : "", + disabled_experiments ? disabled_experiments : ""); + return status; +} + +template +void ParseExperiments(absl::string_view labels, F f) { + absl::string_view::size_type pos = 0; + do { + absl::string_view token; + auto end = labels.find(kDelimiter, pos); + if (end == absl::string_view::npos) { + token = labels.substr(pos); + pos = end; + } else { + token = labels.substr(pos, end - pos); + pos = end + 1; + } + + f(token); + } while (pos != absl::string_view::npos); +} + +} // namespace + +namespace internal { + +const bool* SelectExperiments(bool* buffer, absl::string_view active, + absl::string_view disabled) { + memset(buffer, 0, sizeof(*buffer) * kNumExperiments); + + ParseExperiments(active, [buffer](absl::string_view token) { + Experiment id; + if (LookupExperimentID(token, &id)) { + buffer[static_cast(id)] = true; + } + }); + + if (disabled == kDisableAll) { + memset(buffer, 0, sizeof(*buffer) * kNumExperiments); + } + + ParseExperiments(disabled, [buffer](absl::string_view token) { + Experiment id; + if (LookupExperimentID(token, &id)) { + buffer[static_cast(id)] = false; + } + }); + + return buffer; +} + +} // namespace internal + +bool IsExperimentActive(Experiment exp) { + ASSERT(static_cast(exp) >= 0); + ASSERT(exp < Experiment::kMaxExperimentID); + + return GetSelectedExperiments()[static_cast(exp)]; +} + +void FillExperimentProperties( + std::map* result) { + for (const auto& config : experiments) { + (*result)[absl::StrCat("tcmalloc.experiment.", config.name)].value = + IsExperimentActive(config.id) ? 1 : 0; + } +} + +absl::optional FindExperimentByName(absl::string_view name) { + for (const auto& config : experiments) { + if (name == config.name) { + return config.id; + } + } + + return absl::nullopt; +} + +void PrintExperiments(TCMalloc_Printer* printer) { + // Index experiments by their positions in the experiments array, rather than + // by experiment ID. + static bool active[ABSL_ARRAYSIZE(experiments)]; + static const bool* status = []() { + memset(active, 0, sizeof(active)); + const bool* by_id = GetSelectedExperiments(); + + for (int i = 0; i < ABSL_ARRAYSIZE(experiments); i++) { + const auto& config = experiments[i]; + active[i] = by_id[static_cast(config.id)]; + } + + return active; + }(); + + printer->printf("MALLOC EXPERIMENTS:"); + for (int i = 0; i < ABSL_ARRAYSIZE(experiments); i++) { + const char* value = status[i] ? "1" : "0"; + printer->printf(" %s=%s", experiments[i].name, value); + } + + printer->printf("\n"); +} + +} // namespace tcmalloc diff --git a/tcmalloc/experiment.h b/tcmalloc/experiment.h new file mode 100644 index 000000000..f4d5f2a23 --- /dev/null +++ b/tcmalloc/experiment.h @@ -0,0 +1,69 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_EXPERIMENT_H_ +#define TCMALLOC_EXPERIMENT_H_ + +#include + +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "tcmalloc/experiment_config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" + +// TCMalloc Experiment Controller +// +// This consumes environment variables to decide whether to activate experiments +// to control TCMalloc behavior. It avoids memory allocations when making +// experiment decisions to allow experiments to be used in critical TCMalloc +// initialization paths. +// +// If an experiment is causing difficulty, all experiments can be disabled by +// setting the environment variable: +// BORG_DISABLE_EXPERIMENTS=all *or* +// BORG_DISABLE_EXPERIMENTS=BAD_EXPERIMENT_LABEL + +namespace tcmalloc { +namespace internal { + +constexpr size_t kNumExperiments = + static_cast(Experiment::kMaxExperimentID); + +// SelectExperiments parses the experiments enumerated by active and disabled +// and updates buffer[experiment_id] accordingly. +// +// buffer must be sized for kMaxExperimentID entries. +// +// This is exposed for testing purposes only. +const bool* SelectExperiments(bool* buffer, absl::string_view active, + absl::string_view disabled); + +} // namespace internal + +bool IsExperimentActive(Experiment exp); + +void FillExperimentProperties( + std::map* result); + +absl::optional FindExperimentByName(absl::string_view name); + +void PrintExperiments(TCMalloc_Printer* printer); + +} // namespace tcmalloc + +#endif // TCMALLOC_EXPERIMENT_H_ diff --git a/tcmalloc/experiment_config.h b/tcmalloc/experiment_config.h new file mode 100644 index 000000000..8d69f7267 --- /dev/null +++ b/tcmalloc/experiment_config.h @@ -0,0 +1,47 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_EXPERIMENT_CONFIG_H_ +#define TCMALLOC_EXPERIMENT_CONFIG_H_ + +namespace tcmalloc { + +enum class Experiment : int { + TCMALLOC_TEMERAIRE, + TCMALLOC_HUGEPAGE_MOVING_AVERAGE_RELEASE, + TCMALLOC_SANS_56_SIZECLASS, + TCMALLOC_ARBITRARY_TRANSFER, + TCMALLOC_LARGE_NUM_TO_MOVE, + kMaxExperimentID, +}; + +struct ExperimentConfig { + Experiment id; + const char* name; +}; + +// TODO(b/134687226): Consider making this programmatically generated +constexpr ExperimentConfig experiments[] = { + {Experiment::TCMALLOC_TEMERAIRE, "TCMALLOC_TEMERAIRE"}, + {Experiment::TCMALLOC_HUGEPAGE_MOVING_AVERAGE_RELEASE, + "TCMALLOC_HUGEPAGE_MOVING_AVERAGE_RELEASE"}, + {Experiment::TCMALLOC_SANS_56_SIZECLASS, "TCMALLOC_SANS_56_SIZECLASS"}, + {Experiment::TCMALLOC_ARBITRARY_TRANSFER, + "TCMALLOC_ARBITRARY_TRANSFER_CACHE"}, + {Experiment::TCMALLOC_LARGE_NUM_TO_MOVE, "TCMALLOC_LARGE_NUM_TO_MOVE"}, +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_EXPERIMENT_CONFIG_H_ diff --git a/tcmalloc/experiment_config_test.cc b/tcmalloc/experiment_config_test.cc new file mode 100644 index 000000000..24da9e64a --- /dev/null +++ b/tcmalloc/experiment_config_test.cc @@ -0,0 +1,31 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/experiment_config.h" + +#include "gtest/gtest.h" + +namespace tcmalloc { +namespace { + +// Verify IDs are non-negative and strictly less than kMaxExperimentID. +TEST(ExperimentConfigTest, ValidateIDs) { + for (const auto& exp : experiments) { + ASSERT_LE(0, static_cast(exp.id)); + ASSERT_LT(exp.id, Experiment::kMaxExperimentID); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/experimental_size_classes.cc b/tcmalloc/experimental_size_classes.cc new file mode 100644 index 000000000..3420748df --- /dev/null +++ b/tcmalloc/experimental_size_classes.cc @@ -0,0 +1,682 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/common.h" + +namespace tcmalloc { + +// is fixed per-size-class overhead due to end-of-span fragmentation +// and other factors. For instance, if we have a 96 byte size class, and use a +// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes +// left over. There is also a fixed component of 48 bytes of TCMalloc metadata +// per span. Together, the fixed overhead would be wasted/allocated = +// (32 + 48) / (8192 - 32) ~= 0.98%. +// There is also a dynamic component to overhead based on mismatches between the +// number of bytes requested and the number of bytes provided by the size class. +// Together they sum to the total overhead; for instance if you asked for a +// 50-byte allocation that rounds up to a 64-byte size class, the dynamic +// overhead would be 28%, and if were 22% it would mean (on average) +// 25 bytes of overhead for allocations of that size. + +// clang-format off +#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8 +#if TCMALLOC_PAGE_SHIFT == 13 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kExperimentalSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.59% + { 16, 1, 32}, // 0.59% + { 24, 1, 32}, // 0.68% + { 32, 1, 32}, // 0.59% + { 40, 1, 32}, // 0.98% + { 48, 1, 32}, // 0.98% + { 64, 1, 32}, // 0.59% + { 72, 1, 32}, // 1.28% + { 80, 1, 32}, // 0.98% + { 88, 1, 32}, // 0.68% + { 96, 1, 32}, // 0.98% + { 104, 1, 32}, // 1.58% + { 112, 1, 32}, // 0.78% + { 120, 1, 32}, // 0.98% + { 128, 1, 32}, // 0.59% + { 136, 1, 32}, // 0.98% + { 144, 1, 32}, // 2.18% + { 160, 1, 32}, // 0.98% + { 176, 1, 32}, // 1.78% + { 184, 1, 32}, // 1.78% + { 192, 1, 32}, // 2.18% + { 208, 1, 32}, // 1.58% + { 224, 1, 32}, // 2.18% + { 240, 1, 32}, // 0.98% + { 256, 1, 32}, // 0.59% + { 272, 1, 32}, // 0.98% + { 280, 1, 32}, // 1.48% + { 296, 1, 32}, // 3.10% + { 312, 1, 32}, // 1.58% + { 336, 1, 32}, // 2.18% + { 368, 1, 32}, // 1.78% + { 384, 1, 32}, // 2.18% + { 408, 1, 32}, // 0.98% + { 448, 1, 32}, // 2.18% + { 480, 1, 32}, // 0.98% + { 512, 1, 32}, // 0.59% + { 576, 1, 32}, // 2.18% + { 640, 1, 32}, // 7.29% + { 704, 1, 32}, // 6.40% + { 768, 1, 32}, // 7.29% + { 896, 1, 32}, // 2.18% + { 1024, 1, 32}, // 0.59% + { 1152, 2, 32}, // 1.88% + { 1280, 2, 32}, // 6.98% + { 1408, 2, 32}, // 6.10% + { 1536, 2, 32}, // 6.98% + { 1792, 2, 32}, // 1.88% + { 2048, 2, 32}, // 0.29% + { 2304, 2, 28}, // 1.88% + { 2688, 2, 24}, // 1.88% + { 2816, 3, 23}, // 9.30% + { 3200, 2, 20}, // 2.70% + { 3456, 3, 18}, // 1.79% + { 3584, 4, 18}, // 1.74% + { 4096, 2, 16}, // 0.29% + { 4736, 3, 13}, // 3.99% + { 5376, 2, 12}, // 1.88% + { 6144, 3, 10}, // 0.20% + { 6528, 4, 10}, // 0.54% + { 7168, 7, 9}, // 0.08% + { 8192, 2, 8}, // 0.29% + { 9472, 5, 6}, // 8.23% + { 10240, 4, 6}, // 6.82% + { 12288, 3, 5}, // 0.20% + { 14336, 7, 4}, // 0.08% + { 16384, 2, 4}, // 0.29% + { 20480, 5, 3}, // 0.12% + { 24576, 3, 2}, // 0.20% + { 28672, 7, 2}, // 0.08% + { 32768, 4, 2}, // 0.15% + { 40960, 5, 2}, // 0.12% + { 49152, 6, 2}, // 0.10% + { 57344, 7, 2}, // 0.08% + { 65536, 8, 2}, // 0.07% + { 73728, 9, 2}, // 0.07% + { 81920, 10, 2}, // 0.06% + { 90112, 11, 2}, // 0.05% + { 106496, 13, 2}, // 0.05% + { 131072, 16, 2}, // 0.04% + { 139264, 17, 2}, // 0.03% + { 163840, 20, 2}, // 0.03% + { 180224, 22, 2}, // 0.03% + { 204800, 25, 2}, // 0.02% + { 237568, 29, 2}, // 0.02% + { 262144, 32, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 15 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kExperimentalSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.15% + { 16, 1, 32}, // 0.15% + { 24, 1, 32}, // 0.17% + { 32, 1, 32}, // 0.15% + { 40, 1, 32}, // 0.17% + { 48, 1, 32}, // 0.24% + { 64, 1, 32}, // 0.15% + { 72, 1, 32}, // 0.17% + { 80, 1, 32}, // 0.29% + { 88, 1, 32}, // 0.24% + { 96, 1, 32}, // 0.24% + { 104, 1, 32}, // 0.17% + { 112, 1, 32}, // 0.34% + { 120, 1, 32}, // 0.17% + { 128, 1, 32}, // 0.15% + { 144, 1, 32}, // 0.39% + { 160, 1, 32}, // 0.54% + { 176, 1, 32}, // 0.24% + { 184, 1, 32}, // 0.20% + { 200, 1, 32}, // 0.66% + { 224, 1, 32}, // 0.34% + { 256, 1, 32}, // 0.15% + { 280, 1, 32}, // 0.17% + { 304, 1, 32}, // 0.89% + { 328, 1, 32}, // 1.06% + { 352, 1, 32}, // 0.24% + { 384, 1, 32}, // 0.54% + { 424, 1, 32}, // 0.51% + { 464, 1, 32}, // 1.03% + { 512, 1, 32}, // 0.15% + { 576, 1, 32}, // 1.74% + { 640, 1, 32}, // 0.54% + { 704, 1, 32}, // 1.33% + { 768, 1, 32}, // 1.74% + { 832, 1, 32}, // 1.13% + { 960, 1, 32}, // 0.54% + { 1024, 1, 32}, // 0.15% + { 1152, 1, 32}, // 1.74% + { 1280, 1, 32}, // 2.55% + { 1408, 1, 32}, // 1.33% + { 1536, 1, 32}, // 1.74% + { 1664, 1, 32}, // 3.80% + { 1920, 1, 32}, // 0.54% + { 2048, 1, 32}, // 0.15% + { 2176, 1, 30}, // 0.54% + { 2304, 1, 28}, // 1.74% + { 2688, 1, 24}, // 1.74% + { 2944, 1, 22}, // 1.33% + { 3584, 1, 18}, // 1.74% + { 4096, 1, 16}, // 0.15% + { 4608, 1, 14}, // 1.74% + { 5376, 1, 12}, // 1.74% + { 6528, 1, 10}, // 0.54% + { 7168, 2, 9}, // 1.66% + { 8192, 1, 8}, // 0.15% + { 9344, 2, 7}, // 0.27% + { 10880, 1, 6}, // 0.54% + { 13056, 2, 5}, // 0.47% + { 14336, 4, 4}, // 1.62% + { 16384, 1, 4}, // 0.15% + { 19072, 3, 3}, // 3.14% + { 21760, 2, 3}, // 0.47% + { 24576, 3, 2}, // 0.05% + { 27264, 5, 2}, // 0.19% + { 32768, 1, 2}, // 0.15% + { 38144, 5, 2}, // 7.41% + { 49152, 3, 2}, // 0.05% + { 54528, 5, 2}, // 0.19% + { 65536, 2, 2}, // 0.07% + { 81920, 5, 2}, // 0.03% + { 98304, 3, 2}, // 0.05% + { 114688, 7, 2}, // 0.02% + { 131072, 4, 2}, // 0.04% + { 163840, 5, 2}, // 0.03% + { 196608, 6, 2}, // 0.02% + { 229376, 7, 2}, // 0.02% + { 262144, 8, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 18 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kExperimentalSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.02% + { 16, 1, 32}, // 0.02% + { 24, 1, 32}, // 0.02% + { 32, 1, 32}, // 0.02% + { 40, 1, 32}, // 0.03% + { 48, 1, 32}, // 0.02% + { 64, 1, 32}, // 0.02% + { 72, 1, 32}, // 0.04% + { 80, 1, 32}, // 0.04% + { 88, 1, 32}, // 0.05% + { 96, 1, 32}, // 0.04% + { 104, 1, 32}, // 0.04% + { 112, 1, 32}, // 0.04% + { 128, 1, 32}, // 0.02% + { 144, 1, 32}, // 0.04% + { 160, 1, 32}, // 0.04% + { 184, 1, 32}, // 0.07% + { 200, 1, 32}, // 0.07% + { 224, 1, 32}, // 0.04% + { 256, 1, 32}, // 0.02% + { 288, 1, 32}, // 0.04% + { 312, 1, 32}, // 0.04% + { 344, 1, 32}, // 0.02% + { 416, 1, 32}, // 0.04% + { 472, 1, 32}, // 0.09% + { 512, 1, 32}, // 0.02% + { 576, 1, 32}, // 0.04% + { 640, 1, 32}, // 0.17% + { 704, 1, 32}, // 0.12% + { 768, 1, 32}, // 0.12% + { 832, 1, 32}, // 0.04% + { 1024, 1, 32}, // 0.02% + { 1152, 1, 32}, // 0.26% + { 1280, 1, 32}, // 0.41% + { 1408, 1, 32}, // 0.12% + { 1664, 1, 32}, // 0.36% + { 1792, 1, 32}, // 0.21% + { 1920, 1, 32}, // 0.41% + { 2048, 1, 32}, // 0.02% + { 2176, 1, 30}, // 0.41% + { 2304, 1, 28}, // 0.71% + { 2560, 1, 25}, // 0.41% + { 2944, 1, 22}, // 0.07% + { 3328, 1, 19}, // 1.00% + { 3584, 1, 18}, // 0.21% + { 4096, 1, 16}, // 0.02% + { 4352, 1, 15}, // 0.41% + { 4736, 1, 13}, // 0.66% + { 5120, 1, 12}, // 0.41% + { 5376, 1, 12}, // 1.61% + { 5760, 1, 11}, // 1.15% + { 6144, 1, 10}, // 1.61% + { 6528, 1, 10}, // 0.41% + { 7040, 1, 9}, // 0.66% + { 7680, 1, 8}, // 0.41% + { 8192, 1, 8}, // 0.02% + { 8704, 1, 7}, // 0.41% + { 9344, 1, 7}, // 0.21% + { 9984, 1, 6}, // 1.00% + { 10880, 1, 6}, // 0.41% + { 11904, 1, 5}, // 0.12% + { 13056, 1, 5}, // 0.41% + { 14464, 1, 4}, // 0.71% + { 16384, 1, 4}, // 0.02% + { 17408, 1, 3}, // 0.41% + { 18688, 1, 3}, // 0.21% + { 20096, 1, 3}, // 0.36% + { 21760, 1, 3}, // 0.41% + { 23808, 1, 2}, // 0.12% + { 26112, 1, 2}, // 0.41% + { 29056, 1, 2}, // 0.26% + { 32768, 1, 2}, // 0.02% + { 37376, 1, 2}, // 0.21% + { 43648, 1, 2}, // 0.12% + { 45568, 2, 2}, // 4.61% + { 52352, 1, 2}, // 0.17% + { 56064, 2, 2}, // 3.92% + { 65536, 1, 2}, // 0.02% + { 74880, 2, 2}, // 0.03% + { 87296, 1, 2}, // 0.12% + { 104832, 2, 2}, // 0.03% + { 112256, 3, 2}, // 0.09% + { 131072, 1, 2}, // 0.02% + { 149760, 3, 2}, // 5.03% + { 174720, 2, 2}, // 0.03% + { 196608, 3, 2}, // 0.01% + { 209664, 4, 2}, // 0.03% + { 262144, 1, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 12 +static_assert(kMaxSize == 8192, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kExperimentalSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 1.17% + { 16, 1, 32}, // 1.17% + { 24, 1, 32}, // 1.57% + { 32, 1, 32}, // 1.17% + { 40, 1, 32}, // 1.57% + { 48, 1, 32}, // 1.57% + { 64, 1, 32}, // 1.17% + { 72, 1, 32}, // 2.78% + { 80, 1, 32}, // 1.57% + { 88, 1, 32}, // 2.37% + { 96, 1, 32}, // 2.78% + { 104, 1, 32}, // 2.17% + { 112, 1, 32}, // 2.78% + { 120, 1, 32}, // 1.57% + { 128, 1, 32}, // 1.17% + { 144, 1, 32}, // 2.78% + { 160, 1, 32}, // 3.60% + { 168, 1, 32}, // 2.78% + { 184, 1, 32}, // 2.37% + { 200, 1, 32}, // 3.60% + { 240, 1, 32}, // 1.57% + { 272, 1, 32}, // 1.57% + { 288, 1, 32}, // 2.78% + { 312, 1, 32}, // 2.17% + { 336, 1, 32}, // 2.78% + { 408, 1, 32}, // 1.57% + { 448, 1, 32}, // 2.78% + { 512, 1, 32}, // 1.17% + { 576, 2, 32}, // 2.18% + { 640, 2, 32}, // 7.29% + { 704, 2, 32}, // 6.40% + { 768, 2, 32}, // 7.29% + { 896, 2, 32}, // 2.18% + { 1024, 2, 32}, // 0.59% + { 1152, 3, 32}, // 7.08% + { 1280, 3, 32}, // 7.08% + { 1536, 3, 32}, // 0.39% + { 2048, 4, 32}, // 0.29% + { 2304, 4, 28}, // 1.88% + { 3200, 4, 20}, // 2.70% + { 4096, 4, 16}, // 0.29% + { 4736, 5, 13}, // 8.36% + { 6144, 3, 10}, // 0.39% + { 7168, 7, 9}, // 0.17% + { 8192, 4, 8}, // 0.29% +}; +#else +#error "Unsupported TCMALLOC_PAGE_SHIFT value!" +#endif +#else +#if TCMALLOC_PAGE_SHIFT == 13 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kExperimentalSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.59% + { 16, 1, 32}, // 0.59% + { 32, 1, 32}, // 0.59% + { 48, 1, 32}, // 0.98% + { 64, 1, 32}, // 0.59% + { 80, 1, 32}, // 0.98% + { 96, 1, 32}, // 0.98% + { 112, 1, 32}, // 0.78% + { 128, 1, 32}, // 0.59% + { 144, 1, 32}, // 2.18% + { 160, 1, 32}, // 0.98% + { 176, 1, 32}, // 1.78% + { 192, 1, 32}, // 2.18% + { 208, 1, 32}, // 1.58% + { 224, 1, 32}, // 2.18% + { 240, 1, 32}, // 0.98% + { 256, 1, 32}, // 0.59% + { 272, 1, 32}, // 0.98% + { 288, 1, 32}, // 2.18% + { 304, 1, 32}, // 4.25% + { 320, 1, 32}, // 3.00% + { 336, 1, 32}, // 2.18% + { 352, 1, 32}, // 1.78% + { 368, 1, 32}, // 1.78% + { 384, 1, 32}, // 2.18% + { 400, 1, 32}, // 3.00% + { 416, 1, 32}, // 4.25% + { 448, 1, 32}, // 2.18% + { 480, 1, 32}, // 0.98% + { 512, 1, 32}, // 0.59% + { 576, 1, 32}, // 2.18% + { 640, 1, 32}, // 7.29% + { 704, 1, 32}, // 6.40% + { 768, 1, 32}, // 7.29% + { 896, 1, 32}, // 2.18% + { 1024, 1, 32}, // 0.59% + { 1152, 2, 32}, // 1.88% + { 1280, 2, 32}, // 6.98% + { 1408, 2, 32}, // 6.10% + { 1536, 2, 32}, // 6.98% + { 1792, 2, 32}, // 1.88% + { 2048, 2, 32}, // 0.29% + { 2304, 2, 28}, // 1.88% + { 2688, 2, 24}, // 1.88% + { 2816, 3, 23}, // 9.30% + { 3200, 2, 20}, // 2.70% + { 3456, 3, 18}, // 1.79% + { 3584, 4, 18}, // 1.74% + { 4096, 2, 16}, // 0.29% + { 4736, 3, 13}, // 3.99% + { 5376, 2, 12}, // 1.88% + { 6144, 3, 10}, // 0.20% + { 6528, 4, 10}, // 0.54% + { 7168, 7, 9}, // 0.08% + { 8192, 2, 8}, // 0.29% + { 9472, 5, 6}, // 8.23% + { 10240, 4, 6}, // 6.82% + { 12288, 3, 5}, // 0.20% + { 13568, 5, 4}, // 0.75% + { 14336, 7, 4}, // 0.08% + { 16384, 2, 4}, // 0.29% + { 20480, 5, 3}, // 0.12% + { 24576, 3, 2}, // 0.20% + { 28672, 7, 2}, // 0.08% + { 32768, 4, 2}, // 0.15% + { 40960, 5, 2}, // 0.12% + { 49152, 6, 2}, // 0.10% + { 57344, 7, 2}, // 0.08% + { 65536, 8, 2}, // 0.07% + { 73728, 9, 2}, // 0.07% + { 81920, 10, 2}, // 0.06% + { 90112, 11, 2}, // 0.05% + { 98304, 12, 2}, // 0.05% + { 106496, 13, 2}, // 0.05% + { 114688, 14, 2}, // 0.04% + { 131072, 16, 2}, // 0.04% + { 139264, 17, 2}, // 0.03% + { 155648, 19, 2}, // 0.03% + { 163840, 20, 2}, // 0.03% + { 180224, 22, 2}, // 0.03% + { 196608, 24, 2}, // 0.02% + { 212992, 26, 2}, // 0.02% + { 229376, 28, 2}, // 0.02% + { 245760, 30, 2}, // 0.02% + { 262144, 32, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 15 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kExperimentalSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.15% + { 16, 1, 32}, // 0.15% + { 32, 1, 32}, // 0.15% + { 48, 1, 32}, // 0.24% + { 64, 1, 32}, // 0.15% + { 80, 1, 32}, // 0.29% + { 96, 1, 32}, // 0.24% + { 112, 1, 32}, // 0.34% + { 128, 1, 32}, // 0.15% + { 144, 1, 32}, // 0.39% + { 160, 1, 32}, // 0.54% + { 176, 1, 32}, // 0.24% + { 192, 1, 32}, // 0.54% + { 208, 1, 32}, // 0.49% + { 224, 1, 32}, // 0.34% + { 240, 1, 32}, // 0.54% + { 256, 1, 32}, // 0.15% + { 272, 1, 32}, // 0.54% + { 288, 1, 32}, // 0.84% + { 304, 1, 32}, // 0.89% + { 336, 1, 32}, // 0.69% + { 352, 1, 32}, // 0.24% + { 384, 1, 32}, // 0.54% + { 416, 1, 32}, // 1.13% + { 448, 1, 32}, // 0.34% + { 480, 1, 32}, // 0.54% + { 512, 1, 32}, // 0.15% + { 576, 1, 32}, // 1.74% + { 640, 1, 32}, // 0.54% + { 704, 1, 32}, // 1.33% + { 768, 1, 32}, // 1.74% + { 832, 1, 32}, // 1.13% + { 960, 1, 32}, // 0.54% + { 1024, 1, 32}, // 0.15% + { 1152, 1, 32}, // 1.74% + { 1280, 1, 32}, // 2.55% + { 1408, 1, 32}, // 1.33% + { 1536, 1, 32}, // 1.74% + { 1664, 1, 32}, // 3.80% + { 1920, 1, 32}, // 0.54% + { 2048, 1, 32}, // 0.15% + { 2176, 1, 30}, // 0.54% + { 2304, 1, 28}, // 1.74% + { 2688, 1, 24}, // 1.74% + { 2944, 1, 22}, // 1.33% + { 3200, 1, 20}, // 2.55% + { 3584, 1, 18}, // 1.74% + { 4096, 1, 16}, // 0.15% + { 4608, 1, 14}, // 1.74% + { 5376, 1, 12}, // 1.74% + { 6528, 1, 10}, // 0.54% + { 7168, 2, 9}, // 1.66% + { 8192, 1, 8}, // 0.15% + { 9344, 2, 7}, // 0.27% + { 10880, 1, 6}, // 0.54% + { 13056, 2, 5}, // 0.47% + { 14336, 4, 4}, // 1.62% + { 16384, 1, 4}, // 0.15% + { 19072, 3, 3}, // 3.14% + { 21760, 2, 3}, // 0.47% + { 24576, 3, 2}, // 0.05% + { 26112, 4, 2}, // 0.43% + { 28672, 7, 2}, // 0.02% + { 32768, 1, 2}, // 0.15% + { 38144, 5, 2}, // 7.41% + { 40960, 4, 2}, // 6.71% + { 49152, 3, 2}, // 0.05% + { 54528, 5, 2}, // 0.19% + { 65536, 2, 2}, // 0.07% + { 81920, 5, 2}, // 0.03% + { 98304, 3, 2}, // 0.05% + { 114688, 7, 2}, // 0.02% + { 131072, 4, 2}, // 0.04% + { 163840, 5, 2}, // 0.03% + { 196608, 6, 2}, // 0.02% + { 229376, 7, 2}, // 0.02% + { 262144, 8, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 18 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kExperimentalSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.02% + { 16, 1, 32}, // 0.02% + { 32, 1, 32}, // 0.02% + { 48, 1, 32}, // 0.02% + { 64, 1, 32}, // 0.02% + { 80, 1, 32}, // 0.04% + { 96, 1, 32}, // 0.04% + { 112, 1, 32}, // 0.04% + { 128, 1, 32}, // 0.02% + { 144, 1, 32}, // 0.04% + { 160, 1, 32}, // 0.04% + { 176, 1, 32}, // 0.05% + { 192, 1, 32}, // 0.04% + { 224, 1, 32}, // 0.04% + { 256, 1, 32}, // 0.02% + { 288, 1, 32}, // 0.04% + { 304, 1, 32}, // 0.05% + { 336, 1, 32}, // 0.04% + { 352, 1, 32}, // 0.12% + { 384, 1, 32}, // 0.12% + { 416, 1, 32}, // 0.04% + { 464, 1, 32}, // 0.19% + { 512, 1, 32}, // 0.02% + { 576, 1, 32}, // 0.04% + { 640, 1, 32}, // 0.17% + { 704, 1, 32}, // 0.12% + { 768, 1, 32}, // 0.12% + { 832, 1, 32}, // 0.04% + { 1024, 1, 32}, // 0.02% + { 1152, 1, 32}, // 0.26% + { 1280, 1, 32}, // 0.41% + { 1408, 1, 32}, // 0.12% + { 1664, 1, 32}, // 0.36% + { 1792, 1, 32}, // 0.21% + { 1920, 1, 32}, // 0.41% + { 2048, 1, 32}, // 0.02% + { 2176, 1, 30}, // 0.41% + { 2304, 1, 28}, // 0.71% + { 2560, 1, 25}, // 0.41% + { 2944, 1, 22}, // 0.07% + { 3328, 1, 19}, // 1.00% + { 3584, 1, 18}, // 0.21% + { 4096, 1, 16}, // 0.02% + { 4352, 1, 15}, // 0.41% + { 4736, 1, 13}, // 0.66% + { 5120, 1, 12}, // 0.41% + { 5376, 1, 12}, // 1.61% + { 5760, 1, 11}, // 1.15% + { 6144, 1, 10}, // 1.61% + { 6528, 1, 10}, // 0.41% + { 7040, 1, 9}, // 0.66% + { 7424, 1, 8}, // 0.91% + { 7680, 1, 8}, // 0.41% + { 8192, 1, 8}, // 0.02% + { 8704, 1, 7}, // 0.41% + { 9344, 1, 7}, // 0.21% + { 9984, 1, 6}, // 1.00% + { 10880, 1, 6}, // 0.41% + { 11904, 1, 5}, // 0.12% + { 13056, 1, 5}, // 0.41% + { 13696, 1, 4}, // 0.76% + { 14464, 1, 4}, // 0.71% + { 15360, 1, 4}, // 0.41% + { 16384, 1, 4}, // 0.02% + { 17408, 1, 3}, // 0.41% + { 18688, 1, 3}, // 0.21% + { 20096, 1, 3}, // 0.36% + { 21760, 1, 3}, // 0.41% + { 23808, 1, 2}, // 0.12% + { 26112, 1, 2}, // 0.41% + { 29056, 1, 2}, // 0.26% + { 32768, 1, 2}, // 0.02% + { 37376, 1, 2}, // 0.21% + { 43648, 1, 2}, // 0.12% + { 45568, 2, 2}, // 4.61% + { 52352, 1, 2}, // 0.17% + { 56064, 2, 2}, // 3.92% + { 65536, 1, 2}, // 0.02% + { 74880, 2, 2}, // 0.03% + { 87296, 1, 2}, // 0.12% + { 104832, 2, 2}, // 0.03% + { 112256, 3, 2}, // 0.09% + { 131072, 1, 2}, // 0.02% + { 149760, 3, 2}, // 5.03% + { 174720, 2, 2}, // 0.03% + { 196608, 3, 2}, // 0.01% + { 209664, 4, 2}, // 0.03% + { 262144, 1, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 12 +static_assert(kMaxSize == 8192, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kExperimentalSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 1.17% + { 16, 1, 32}, // 1.17% + { 32, 1, 32}, // 1.17% + { 48, 1, 32}, // 1.57% + { 64, 1, 32}, // 1.17% + { 80, 1, 32}, // 1.57% + { 96, 1, 32}, // 2.78% + { 112, 1, 32}, // 2.78% + { 128, 1, 32}, // 1.17% + { 144, 1, 32}, // 2.78% + { 160, 1, 32}, // 3.60% + { 176, 1, 32}, // 2.37% + { 192, 1, 32}, // 2.78% + { 208, 1, 32}, // 4.86% + { 224, 1, 32}, // 2.78% + { 240, 1, 32}, // 1.57% + { 256, 1, 32}, // 1.17% + { 272, 1, 32}, // 1.57% + { 288, 1, 32}, // 2.78% + { 304, 1, 32}, // 4.86% + { 336, 1, 32}, // 2.78% + { 368, 1, 32}, // 2.37% + { 400, 1, 32}, // 3.60% + { 448, 1, 32}, // 2.78% + { 512, 1, 32}, // 1.17% + { 576, 2, 32}, // 2.18% + { 640, 2, 32}, // 7.29% + { 704, 2, 32}, // 6.40% + { 768, 2, 32}, // 7.29% + { 896, 2, 32}, // 2.18% + { 1024, 2, 32}, // 0.59% + { 1152, 3, 32}, // 7.08% + { 1280, 3, 32}, // 7.08% + { 1536, 3, 32}, // 0.39% + { 1792, 4, 32}, // 1.88% + { 2048, 4, 32}, // 0.29% + { 2304, 4, 28}, // 1.88% + { 2688, 4, 24}, // 1.88% + { 3200, 4, 20}, // 2.70% + { 3584, 7, 18}, // 0.17% + { 4096, 4, 16}, // 0.29% + { 4736, 5, 13}, // 8.36% + { 6144, 3, 10}, // 0.39% + { 7168, 7, 9}, // 0.17% + { 8192, 4, 8}, // 0.29% +}; +#else +#error "Unsupported TCMALLOC_PAGE_SHIFT value!" +#endif +#endif +// clang-format on + +} // namespace tcmalloc diff --git a/tcmalloc/guarded_page_allocator.cc b/tcmalloc/guarded_page_allocator.cc new file mode 100644 index 000000000..1f8bc1b92 --- /dev/null +++ b/tcmalloc/guarded_page_allocator.cc @@ -0,0 +1,517 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/guarded_page_allocator.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "absl/base/call_once.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/internal/sysinfo.h" +#include "absl/debugging/stacktrace.h" +#include "absl/strings/string_view.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/bits.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/util.h" +#include "tcmalloc/pagemap.h" +#include "tcmalloc/sampler.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/system-alloc.h" + +namespace tcmalloc { + +const size_t GuardedPageAllocator::kMagicSize; // NOLINT + +void GuardedPageAllocator::Init(size_t max_alloced_pages, size_t total_pages) { + CHECK_CONDITION(max_alloced_pages > 0); + CHECK_CONDITION(max_alloced_pages <= total_pages); + CHECK_CONDITION(total_pages <= kGpaMaxPages); + max_alloced_pages_ = max_alloced_pages; + total_pages_ = total_pages; + + // If the system page size is larger than kPageSize, we need to use the + // system page size for this allocator since mprotect operates on full pages + // only. This case happens on PPC. + page_size_ = std::max(kPageSize, static_cast(getpagesize())); + ASSERT(page_size_ % kPageSize == 0); + + rand_ = reinterpret_cast(this); // Initialize RNG seed. + MapPages(); +} + +void GuardedPageAllocator::Destroy() { + absl::base_internal::SpinLockHolder h(&guarded_page_lock); + if (initialized_) { + size_t len = pages_end_addr_ - pages_base_addr_; + int err = munmap(reinterpret_cast(pages_base_addr_), len); + ASSERT(err != -1); + (void)err; + initialized_ = false; + } +} + +void *GuardedPageAllocator::Allocate(size_t size, size_t alignment) { + if (size == 0) return nullptr; + ssize_t free_slot = ReserveFreeSlot(); + if (free_slot == -1) return nullptr; // All slots are reserved. + + ASSERT(size <= page_size_); + ASSERT(alignment <= page_size_); + ASSERT((alignment & (alignment - 1)) == 0); + void *result = reinterpret_cast(SlotToAddr(free_slot)); + if (mprotect(result, page_size_, PROT_READ | PROT_WRITE) == -1) { + ASSERT(false && "mprotect failed"); + absl::base_internal::SpinLockHolder h(&guarded_page_lock); + num_failed_allocations_++; + FreeSlot(free_slot); + return nullptr; + } + + // Place some allocations at end of page for better overflow detection. + MaybeRightAlign(free_slot, size, alignment, &result); + + // Record stack trace. + SlotMetadata &d = data_[free_slot]; + d.dealloc_trace.depth = 0; + d.alloc_trace.depth = absl::GetStackTrace(d.alloc_trace.stack, kMaxStackDepth, + /*skip_count=*/3); + d.alloc_trace.tid = absl::base_internal::GetTID(); + d.requested_size = size; + d.allocation_start = reinterpret_cast(result); + + ASSERT(!alignment || d.allocation_start % alignment == 0); + return result; +} + +void GuardedPageAllocator::Deallocate(void *ptr) { + ASSERT(PointerIsMine(ptr)); + const uintptr_t page_addr = GetPageAddr(reinterpret_cast(ptr)); + size_t slot = AddrToSlot(page_addr); + + absl::base_internal::SpinLockHolder h(&guarded_page_lock); + if (IsFreed(slot)) { + double_free_detected_ = true; + } else if (WriteOverflowOccurred(slot)) { + write_overflow_detected_ = true; + } + + CHECK_CONDITION(mprotect(reinterpret_cast(page_addr), page_size_, + PROT_NONE) != -1); + + if (write_overflow_detected_ || double_free_detected_) { + *reinterpret_cast(ptr) = 'X'; // Trigger SEGV handler. + CHECK_CONDITION(false); // Unreachable. + } + + // Record stack trace. + GpaStackTrace &trace = data_[slot].dealloc_trace; + trace.depth = absl::GetStackTrace(trace.stack, kMaxStackDepth, + /*skip_count=*/2); + trace.tid = absl::base_internal::GetTID(); + + FreeSlot(slot); +} + +size_t GuardedPageAllocator::GetRequestedSize(const void *ptr) const { + ASSERT(PointerIsMine(ptr)); + size_t slot = AddrToSlot(GetPageAddr(reinterpret_cast(ptr))); + return data_[slot].requested_size; +} + +std::pair GuardedPageAllocator::GetAllocationOffsetAndSize( + const void *ptr) const { + ASSERT(PointerIsMine(ptr)); + const uintptr_t addr = reinterpret_cast(ptr); + const size_t slot = GetNearestSlot(addr); + return {addr - data_[slot].allocation_start, data_[slot].requested_size}; +} + +GuardedPageAllocator::ErrorType GuardedPageAllocator::GetStackTraces( + const void *ptr, GpaStackTrace *alloc_trace, + GpaStackTrace *dealloc_trace) const { + ASSERT(PointerIsMine(ptr)); + const uintptr_t addr = reinterpret_cast(ptr); + size_t slot = GetNearestSlot(addr); + *alloc_trace = data_[slot].alloc_trace; + *dealloc_trace = data_[slot].dealloc_trace; + return GetErrorType(addr, alloc_trace->depth, dealloc_trace->depth); +} + +// We take guarded samples during periodic profiling samples. Computes the +// mean number of profiled samples made for every guarded sample. +static int GetChainedRate() { + auto guarded_rate = Parameters::guarded_sampling_rate(); + auto sample_rate = Parameters::profile_sampling_rate(); + if (guarded_rate < 0 || sample_rate <= 0) { + return guarded_rate; + } else { + return std::ceil(static_cast(guarded_rate) / + static_cast(sample_rate)); + } +} + +void GuardedPageAllocator::Print(TCMalloc_Printer *out) { + absl::base_internal::SpinLockHolder h(&guarded_page_lock); + out->printf( + "\n" + "------------------------------------------------\n" + "GWP-ASan Status\n" + "------------------------------------------------\n" + "Successful Allocations: %zu\n" + "Failed Allocations: %zu\n" + "Slots Currently Allocated: %zu\n" + "Slots Currently Quarantined: %zu\n" + "Maximum Slots Allocated: %zu / %zu\n" + "PARAMETER tcmalloc_guarded_sample_parameter %d\n", + num_allocation_requests_ - num_failed_allocations_, + num_failed_allocations_, num_alloced_pages_, + total_pages_ - num_alloced_pages_, num_alloced_pages_max_, + max_alloced_pages_, GetChainedRate()); +} + +void GuardedPageAllocator::PrintInPbtxt(PbtxtRegion *gwp_asan) const { + absl::base_internal::SpinLockHolder h(&guarded_page_lock); + gwp_asan->PrintI64("successful_allocations", + num_allocation_requests_ - num_failed_allocations_); + gwp_asan->PrintI64("failed_allocations", num_failed_allocations_); + gwp_asan->PrintI64("current_slots_allocated", num_alloced_pages_); + gwp_asan->PrintI64("current_slots_quarantined", + total_pages_ - num_alloced_pages_); + gwp_asan->PrintI64("max_slots_allocated", num_alloced_pages_max_); + gwp_asan->PrintI64("allocated_slot_limit", max_alloced_pages_); + gwp_asan->PrintI64("tcmalloc_guarded_sample_parameter", GetChainedRate()); +} + +// Maps 2 * total_pages_ + 1 pages so that there are total_pages_ unique pages +// we can return from Allocate with guard pages before and after them. +void GuardedPageAllocator::MapPages() { + absl::base_internal::SpinLockHolder h(&guarded_page_lock); + ASSERT(!first_page_addr_); + ASSERT(page_size_ % getpagesize() == 0); + size_t len = (2 * total_pages_ + 1) * page_size_; + auto base_addr = reinterpret_cast( + MmapAligned(len, page_size_, /*tagged=*/true)); + ASSERT(base_addr); + if (!base_addr) return; + + // Tell TCMalloc's PageMap about the memory we own. + const PageID page = base_addr >> kPageShift; + const Length page_len = len >> kPageShift; + if (!Static::pagemap()->Ensure(page, page_len)) { + ASSERT(false && "Failed to notify page map of page-guarded memory."); + return; + } + + // Allocate memory for slot metadata. + data_ = reinterpret_cast( + Static::arena()->Alloc(sizeof(*data_) * total_pages_)); + + pages_base_addr_ = base_addr; + pages_end_addr_ = pages_base_addr_ + len; + + // Align first page to page_size_. + first_page_addr_ = GetPageAddr(pages_base_addr_ + page_size_); + + std::fill_n(free_pages_, total_pages_, true); + initialized_ = true; +} + +// Selects a random slot in O(total_pages_) time. +ssize_t GuardedPageAllocator::ReserveFreeSlot() { + absl::base_internal::SpinLockHolder h(&guarded_page_lock); + if (!initialized_ || !allow_allocations_) return -1; + num_allocation_requests_++; + if (num_alloced_pages_ == max_alloced_pages_) { + num_failed_allocations_++; + return -1; + } + + rand_ = Sampler::NextRandom(rand_); + size_t num_free_pages = total_pages_ - num_alloced_pages_; + size_t slot = GetIthFreeSlot(rand_ % num_free_pages); + ASSERT(free_pages_[slot]); + free_pages_[slot] = false; + num_alloced_pages_++; + num_alloced_pages_max_ = std::max(num_alloced_pages_, num_alloced_pages_max_); + return slot; +} + +size_t GuardedPageAllocator::GetIthFreeSlot(size_t ith_free_slot) { + ASSERT(ith_free_slot < total_pages_ - num_alloced_pages_); + for (size_t free_slot_count = 0, j = 0;; j++) { + if (free_pages_[j]) { + if (free_slot_count == ith_free_slot) return j; + free_slot_count++; + } + } +} + +void GuardedPageAllocator::FreeSlot(size_t slot) { + ASSERT(slot < total_pages_); + ASSERT(!free_pages_[slot]); + free_pages_[slot] = true; + num_alloced_pages_--; +} + +uintptr_t GuardedPageAllocator::GetPageAddr(uintptr_t addr) const { + const uintptr_t addr_mask = ~(page_size_ - 1ULL); + return addr & addr_mask; +} + +uintptr_t GuardedPageAllocator::GetNearestValidPage(uintptr_t addr) const { + if (addr < first_page_addr_) return first_page_addr_; + const uintptr_t last_page_addr = + first_page_addr_ + 2 * (total_pages_ - 1) * page_size_; + if (addr > last_page_addr) return last_page_addr; + uintptr_t offset = addr - first_page_addr_; + + // If addr is already on a valid page, just return addr. + if ((offset / page_size_) % 2 == 0) return addr; + + // ptr points to a guard page, so get nearest valid page. + const size_t kHalfPageSize = page_size_ / 2; + if ((offset / kHalfPageSize) % 2 == 0) { + return addr - kHalfPageSize; // Round down. + } + return addr + kHalfPageSize; // Round up. +} + +size_t GuardedPageAllocator::GetNearestSlot(uintptr_t addr) const { + return AddrToSlot(GetPageAddr(GetNearestValidPage(addr))); +} + +bool GuardedPageAllocator::IsFreed(size_t slot) const { + return free_pages_[slot]; +} + +bool GuardedPageAllocator::WriteOverflowOccurred(size_t slot) const { + if (!ShouldRightAlign(slot)) return false; + uint8_t magic = GetWriteOverflowMagic(slot); + uintptr_t alloc_end = + data_[slot].allocation_start + data_[slot].requested_size; + uintptr_t page_end = SlotToAddr(slot) + page_size_; + uintptr_t magic_end = std::min(page_end, alloc_end + kMagicSize); + for (uintptr_t p = alloc_end; p < magic_end; ++p) { + if (*reinterpret_cast(p) != magic) return true; + } + return false; +} + +GuardedPageAllocator::ErrorType GuardedPageAllocator::GetErrorType( + uintptr_t addr, uintptr_t alloc_trace_depth, + uintptr_t dealloc_trace_depth) const { + if (!alloc_trace_depth) return ErrorType::kUnknown; + if (double_free_detected_) return ErrorType::kDoubleFree; + if (write_overflow_detected_) return ErrorType::kBufferOverflowOnDealloc; + if (dealloc_trace_depth) return ErrorType::kUseAfterFree; + if (addr < first_page_addr_) return ErrorType::kBufferUnderflow; + const uintptr_t last_page_addr = + first_page_addr_ + 2 * (total_pages_ - 1) * page_size_; + if (addr > last_page_addr) return ErrorType::kBufferOverflow; + + const uintptr_t offset = addr - first_page_addr_; + if ((offset / page_size_) % 2 == 0) return ErrorType::kUnknown; + + const size_t kHalfPageSize = page_size_ / 2; + return (offset / kHalfPageSize) % 2 == 0 ? ErrorType::kBufferOverflow + : ErrorType::kBufferUnderflow; +} + +uintptr_t GuardedPageAllocator::SlotToAddr(size_t slot) const { + ASSERT(slot < total_pages_); + return first_page_addr_ + 2 * slot * page_size_; +} + +size_t GuardedPageAllocator::AddrToSlot(uintptr_t addr) const { + uintptr_t offset = addr - first_page_addr_; + ASSERT(offset % page_size_ == 0); + ASSERT((offset / page_size_) % 2 == 0); + int slot = offset / page_size_ / 2; + ASSERT(slot >= 0 && slot < total_pages_); + return slot; +} + +void GuardedPageAllocator::MaybeRightAlign(size_t slot, size_t size, + size_t alignment, void **ptr) { + if (!ShouldRightAlign(slot)) return; + uintptr_t adjusted_ptr = + reinterpret_cast(*ptr) + page_size_ - size; + + // If alignment == 0, the necessary alignment is never larger than the size + // rounded up to the next power of 2. We use this fact to minimize alignment + // padding between the end of small allocations and their guard pages. For + // allocations larger than kAlignment, we're safe aligning to kAlignment. + size_t default_alignment = std::min( + size_t{1} << tcmalloc_internal::Bits::Log2Ceiling(size), kAlignment); + + // Ensure valid alignment. + alignment = std::max(alignment, default_alignment); + uintptr_t alignment_padding = adjusted_ptr & (alignment - 1); + adjusted_ptr -= alignment_padding; + + // Write magic bytes in alignment padding to detect small overflow writes. + size_t magic_size = std::min(alignment_padding, kMagicSize); + memset(reinterpret_cast(adjusted_ptr + size), + GetWriteOverflowMagic(slot), magic_size); + *ptr = reinterpret_cast(adjusted_ptr); +} +// +// error contains the type of error to record. +// +// Note that we cannot use ::testing::Test::RecordProperty() because it doesn't +// write the XML file if a test crashes (which we're about to do here). So we +// write directly to the XML file instead. +static void RecordCrash(absl::string_view error) { + + const char *xml_file = + tcmalloc::tcmalloc_internal::thread_safe_getenv("XML_OUTPUT_FILE"); + if (!xml_file) return; // Not a gUnit test. + + // Record test failure for Sponge. + constexpr char xml_text_header[] = + "" + "" + " " + " " + " " + " " + " GWP-ASan detected a memory error. See the test log for full report." + " " + ""; + + int fd = open(xml_file, O_CREAT | O_WRONLY | O_TRUNC, 0644); + if (fd == -1) return; + (void)write(fd, xml_text_header, sizeof(xml_text_header) - 1); + (void)write(fd, error.data(), error.size()); + (void)write(fd, xml_text_footer, sizeof(xml_text_footer) - 1); + close(fd); +} + +static struct sigaction old_sa; + +static void ForwardSignal(int signo, siginfo_t *info, void *context) { + if (old_sa.sa_flags & SA_SIGINFO) { + old_sa.sa_sigaction(signo, info, context); + } else if (old_sa.sa_handler == SIG_DFL) { + // No previous handler registered. Re-raise signal for core dump. + int err = sigaction(signo, &old_sa, nullptr); + if (err == -1) { + Log(kLog, __FILE__, __LINE__, "Couldn't restore previous sigaction!"); + } + raise(signo); + } else if (old_sa.sa_handler == SIG_IGN) { + return; // Previous sigaction ignored signal, so do the same. + } else { + old_sa.sa_handler(signo); + } +} + +// A SEGV handler that prints stack traces for the allocation and deallocation +// of relevant memory as well as the location of the memory error. +static void SegvHandler(int signo, siginfo_t *info, void *context) { + if (signo != SIGSEGV) return; + void *fault = info->si_addr; + if (!Static::guardedpage_allocator()->PointerIsMine(fault)) return; + GuardedPageAllocator::GpaStackTrace alloc_trace, dealloc_trace; + GuardedPageAllocator::ErrorType error = + Static::guardedpage_allocator()->GetStackTraces(fault, &alloc_trace, + &dealloc_trace); + if (error == GuardedPageAllocator::ErrorType::kUnknown) return; + pid_t current_thread = absl::base_internal::GetTID(); + off_t offset; + size_t size; + std::tie(offset, size) = + Static::guardedpage_allocator()->GetAllocationOffsetAndSize(fault); + + Log(kLog, __FILE__, __LINE__, + "*** GWP-ASan has detected a memory error ***"); + Log(kLog, __FILE__, __LINE__, ">>> Access at offset", offset, + "into buffer of length", size); + Log(kLog, __FILE__, __LINE__, + "Error originates from memory allocated in thread", alloc_trace.tid, + "at:"); + + switch (error) { + case GuardedPageAllocator::ErrorType::kUseAfterFree: + Log(kLog, __FILE__, __LINE__, "The memory was freed in thread", + dealloc_trace.tid, "at:"); + Log(kLog, __FILE__, __LINE__, "Use-after-free occurs in thread", + current_thread, "at:"); + RecordCrash("use-after-free"); + break; + case GuardedPageAllocator::ErrorType::kBufferUnderflow: + Log(kLog, __FILE__, __LINE__, "Buffer underflow occurs in thread", + current_thread, "at:"); + RecordCrash("buffer-underflow"); + break; + case GuardedPageAllocator::ErrorType::kBufferOverflow: + Log(kLog, __FILE__, __LINE__, "Buffer overflow occurs in thread", + current_thread, "at:"); + RecordCrash("buffer-overflow"); + break; + case GuardedPageAllocator::ErrorType::kDoubleFree: + Log(kLog, __FILE__, __LINE__, "The memory was freed in thread", + dealloc_trace.tid, "at:"); + Log(kLog, __FILE__, __LINE__, "Double free occurs in thread", + current_thread, "at:"); + RecordCrash("double-free"); + break; + case GuardedPageAllocator::ErrorType::kBufferOverflowOnDealloc: + Log(kLog, __FILE__, __LINE__, + "Buffer overflow (write) detected in thread", current_thread, + "at free:"); + RecordCrash("buffer-overflow-detected-at-free"); + break; + case GuardedPageAllocator::ErrorType::kUnknown: + Log(kCrash, __FILE__, __LINE__, "Unexpected ErrorType::kUnknown"); + } + if (error == GuardedPageAllocator::ErrorType::kBufferOverflowOnDealloc) { + Log(kLog, __FILE__, __LINE__, + "*** Try rerunning with --config=asan to get stack trace of overflow " + "***"); + } + + ForwardSignal(signo, info, context); +} + +ABSL_ATTRIBUTE_WEAK void SetDefaultGuardedSamplingRate(); + +extern "C" void MallocExtension_Internal_ActivateGuardedSampling() { + static absl::once_flag flag; + absl::call_once(flag, []() { + struct sigaction action = {}; + action.sa_sigaction = SegvHandler; + sigemptyset(&action.sa_mask); + action.sa_flags = SA_SIGINFO; + sigaction(SIGSEGV, &action, &old_sa); + Static::guardedpage_allocator()->AllowAllocations(); + if (SetDefaultGuardedSamplingRate) { + SetDefaultGuardedSamplingRate(); + } + }); +} + +} // namespace tcmalloc diff --git a/tcmalloc/guarded_page_allocator.h b/tcmalloc/guarded_page_allocator.h new file mode 100644 index 000000000..8ee4165ba --- /dev/null +++ b/tcmalloc/guarded_page_allocator.h @@ -0,0 +1,305 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_GUARDED_PAGE_ALLOCATOR_H_ +#define TCMALLOC_GUARDED_PAGE_ALLOCATOR_H_ + +#include +#include +#include + +#include + +#include "absl/base/attributes.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/thread_annotations.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { + +extern absl::base_internal::SpinLock guarded_page_lock; + +// An allocator that gives each allocation a new region, with guard pages on +// either side of the allocated region. If a buffer is overflowed to the next +// guard page or underflowed to the previous guard page, a segfault occurs. +// After an allocation is freed, the underlying page is marked as inaccessible, +// and any future accesses to it will also cause segfaults until the page is +// reallocated. +// +// Is safe to use with static storage duration and is thread safe with the +// exception of calls to Init() and Destroy() (see corresponding function +// comments). +// +// SYNCHRONIZATION +// Requires the SpinLock guarded_page_lock to be defined externally. This is +// required so that this class may be instantiated with static storage +// duration. The lock is held by this class during initialization and when +// accessing the internal free page map. +// +// Example: +// absl::base_internal::SpinLock +// guarded_page_lock(absl::base_internal::kLinkerInitialized); +// GuardedPageAllocator gpa; +// +// void foo() { +// char *buf = reinterpret_cast(gpa.Allocate(8000, 1)); +// buf[0] = 'A'; // OK. No segfault occurs. +// memset(buf, 'A', 8000); // OK. No segfault occurs. +// buf[-300] = 'A'; // Segfault! +// buf[9000] = 'A'; // Segfault! +// gpa.Deallocate(buf); +// buf[0] = 'B'; // Segfault! +// } +// +// int main() { +// // Call Init() only once. +// gpa.Init(64, GuardedPageAllocator::kGpaMaxPages); +// gpa.AllowAllocations(); +// for (int i = 0; i < 1000; i++) foo(); +// return 0; +// } +class GuardedPageAllocator { + public: + struct GpaStackTrace { + void *stack[kMaxStackDepth]; + size_t depth; + pid_t tid; + }; + + // Maximum number of pages this class can allocate. + static constexpr size_t kGpaMaxPages = 512; + + enum class ErrorType { + kUseAfterFree, + kBufferUnderflow, + kBufferOverflow, + kDoubleFree, + kBufferOverflowOnDealloc, + kUnknown, + }; + + constexpr GuardedPageAllocator() + : free_pages_{}, + num_alloced_pages_(0), + num_alloced_pages_max_(0), + num_allocation_requests_(0), + num_failed_allocations_(0), + data_(nullptr), + pages_base_addr_(0), + pages_end_addr_(0), + first_page_addr_(0), + max_alloced_pages_(0), + total_pages_(0), + page_size_(0), + rand_(0), + initialized_(false), + allow_allocations_(false), + double_free_detected_(false), + write_overflow_detected_(false) {} + + GuardedPageAllocator(const GuardedPageAllocator &) = delete; + GuardedPageAllocator &operator=(const GuardedPageAllocator &) = delete; + + ~GuardedPageAllocator() = default; + + // Configures this allocator to allocate up to max_alloced_pages pages at a + // time from a pool of total_pages pages, where: + // 1 <= max_alloced_pages <= total_pages <= kGpaMaxPages + // + // This method should be called non-concurrently and only once to complete + // initialization. Dynamic initialization is deliberately done here and not + // in the constructor, thereby allowing the constructor to be constexpr and + // avoiding static initialization order issues. + void Init(size_t max_alloced_pages, size_t total_pages) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Unmaps memory allocated by this class. + // + // This method should be called non-concurrently and only once to complete + // destruction. Destruction is deliberately done here and not in the + // destructor, thereby allowing the destructor to be trivial (i.e. a no-op) + // and avoiding use-after-destruction issues for static/global instances. + void Destroy(); + + // On success, returns a pointer to size bytes of page-guarded memory, aligned + // to alignment. On failure, returns nullptr. The returned pointer is + // guaranteed to be tagged. Failure can occur if memory could not be mapped + // or protected, if all guarded pages are already allocated, or if size is 0. + // + // Precondition: size and alignment <= page_size_ + // Precondition: alignment is 0 or a power of 2 + void *Allocate(size_t size, size_t alignment) + LOCKS_EXCLUDED(guarded_page_lock); + + // Deallocates memory pointed to by ptr. ptr must have been previously + // returned by a call to Allocate. + void Deallocate(void *ptr) LOCKS_EXCLUDED(guarded_page_lock); + + // Returns the size requested when ptr was allocated. ptr must have been + // previously returned by a call to Allocate. + size_t GetRequestedSize(const void *ptr) const; + + // Returns ptr's offset from the beginning of its allocation along with the + // allocation's size. + std::pair GetAllocationOffsetAndSize(const void *ptr) const; + + // Records stack traces in alloc_trace and dealloc_trace for the page nearest + // to ptr. alloc_trace is the trace at the time the page was allocated. If + // the page is still allocated, dealloc_trace->depth will be 0. If the page + // has been deallocated, dealloc_trace is the trace at the time the page was + // deallocated. + // + // Returns the likely error type for an access at ptr. + // + // Requires that ptr points to memory mapped by this class. + ErrorType GetStackTraces(const void *ptr, GpaStackTrace *alloc_trace, + GpaStackTrace *dealloc_trace) const; + + // Writes a human-readable summary of GuardedPageAllocator's internal state to + // *out. + void Print(TCMalloc_Printer *out) LOCKS_EXCLUDED(guarded_page_lock); + void PrintInPbtxt(PbtxtRegion *gwp_asan) const + LOCKS_EXCLUDED(guarded_page_lock); + + // Returns true if ptr points to memory managed by this class. + inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE + PointerIsMine(const void *ptr) const { + uintptr_t addr = reinterpret_cast(ptr); + return pages_base_addr_ <= addr && addr < pages_end_addr_; + } + + // Allows Allocate() to start returning allocations. + void AllowAllocations() LOCKS_EXCLUDED(guarded_page_lock) { + absl::base_internal::SpinLockHolder h(&guarded_page_lock); + allow_allocations_ = true; + } + + private: + // Structure for storing data about a slot. + struct SlotMetadata { + GpaStackTrace alloc_trace; + GpaStackTrace dealloc_trace; + size_t requested_size; + uintptr_t allocation_start; + }; + + // Max number of magic bytes we use to detect write-overflows at deallocation. + static constexpr size_t kMagicSize = 32; + + // Maps pages into memory. + void MapPages() LOCKS_EXCLUDED(guarded_page_lock) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Reserves and returns a slot randomly selected from the free slots in + // free_pages_. Returns -1 if no slots available, or if AllowAllocations() + // hasn't been called yet. + ssize_t ReserveFreeSlot() LOCKS_EXCLUDED(guarded_page_lock); + + // Returns the i-th free slot of free_pages_. i must be in the range [0, + // total_pages_ - num_alloced_pages_). + size_t GetIthFreeSlot(size_t i) EXCLUSIVE_LOCKS_REQUIRED(guarded_page_lock); + + // Marks the specified slot as unreserved. + void FreeSlot(size_t slot) EXCLUSIVE_LOCKS_REQUIRED(guarded_page_lock); + + // Returns the address of the page that addr resides on. + uintptr_t GetPageAddr(uintptr_t addr) const; + + // Returns an address somewhere on the valid page nearest to addr. + uintptr_t GetNearestValidPage(uintptr_t addr) const; + + // Returns the slot number for the page nearest to addr. + size_t GetNearestSlot(uintptr_t addr) const; + + // Returns true if the specified slot has already been freed. + bool IsFreed(size_t slot) const EXCLUSIVE_LOCKS_REQUIRED(guarded_page_lock); + + // Returns true if magic bytes for slot were overwritten. + bool WriteOverflowOccurred(size_t slot) const; + + // Returns the likely error type for the given trace depths and access + // address. + ErrorType GetErrorType(uintptr_t addr, uintptr_t alloc_trace_depth, + uintptr_t dealloc_trace_depth) const; + + // Magic constant used for detecting write-overflows at deallocation time. + static uint8_t GetWriteOverflowMagic(size_t slot) { + // Only even slots get magic bytes, so use slot / 2 for more unique magics. + return uint8_t{0xcd} * static_cast(slot / 2); + } + + // Returns true if slot should be right aligned. + static bool ShouldRightAlign(size_t slot) { return slot % 2 == 0; } + + // If slot is marked for right alignment, moves the allocation in *ptr to the + // right end of the slot, maintaining the specified size and alignment. Magic + // bytes are written in any alignment padding. + void MaybeRightAlign(size_t slot, size_t size, size_t alignment, void **ptr); + + uintptr_t SlotToAddr(size_t slot) const; + size_t AddrToSlot(uintptr_t addr) const; + + // Maps each bool to one page. + // true: Free. false: Reserved. + bool free_pages_[kGpaMaxPages] GUARDED_BY(guarded_page_lock); + + // Number of currently-allocated pages. + size_t num_alloced_pages_ GUARDED_BY(guarded_page_lock); + + // The high-water mark for num_alloced_pages_. + size_t num_alloced_pages_max_ GUARDED_BY(guarded_page_lock); + + // Number of calls to Allocate. + size_t num_allocation_requests_ GUARDED_BY(guarded_page_lock); + + // Number of times Allocate has failed. + size_t num_failed_allocations_ GUARDED_BY(guarded_page_lock); + + // A dynamically-allocated array of stack trace data captured when each page + // is allocated/deallocated. Printed by the SEGV handler when a memory error + // is detected. + SlotMetadata *data_; + + uintptr_t pages_base_addr_; // Points to start of mapped region. + uintptr_t pages_end_addr_; // Points to the end of mapped region. + uintptr_t first_page_addr_; // Points to first page returnable by Allocate. + size_t max_alloced_pages_; // Max number of pages to allocate at once. + size_t total_pages_; // Size of the page pool to allocate from. + size_t page_size_; // Size of pages we allocate. + uint64_t rand_; // RNG seed. + + // True if this object has been fully initialized. + bool initialized_ GUARDED_BY(guarded_page_lock); + + // Flag to control whether we can return allocations or not. + bool allow_allocations_ GUARDED_BY(guarded_page_lock); + + // Set to true if a double free has occurred. + bool double_free_detected_; + + // Set to true if a write overflow was detected on deallocation. + bool write_overflow_detected_; + + friend struct ConstexprCheck; +}; + +struct ConstexprCheck { + static_assert(GuardedPageAllocator().rand_ || true, + "GuardedPageAllocator must have a constexpr constructor"); +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_GUARDED_PAGE_ALLOCATOR_H_ diff --git a/tcmalloc/guarded_page_allocator_test.cc b/tcmalloc/guarded_page_allocator_test.cc new file mode 100644 index 000000000..1973f8cb2 --- /dev/null +++ b/tcmalloc/guarded_page_allocator_test.cc @@ -0,0 +1,137 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/guarded_page_allocator.h" + +#include +#include +#include + +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/internal/spinlock.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { +namespace { + +static constexpr size_t kMaxGpaPages = + tcmalloc::GuardedPageAllocator::kGpaMaxPages; + +// Size of pages used by GuardedPageAllocator. +static size_t PageSize() { + static const size_t page_size = + std::max(kPageSize, static_cast(getpagesize())); + return page_size; +} + +class GuardedPageAllocatorTest : public testing::Test { + protected: + GuardedPageAllocatorTest() { + absl::base_internal::SpinLockHolder h(&tcmalloc::pageheap_lock); + gpa_.Init(kMaxGpaPages, kMaxGpaPages); + gpa_.AllowAllocations(); + } + + explicit GuardedPageAllocatorTest(size_t num_pages) { + absl::base_internal::SpinLockHolder h(&tcmalloc::pageheap_lock); + gpa_.Init(num_pages, kMaxGpaPages); + gpa_.AllowAllocations(); + } + + ~GuardedPageAllocatorTest() override { gpa_.Destroy(); } + + tcmalloc::GuardedPageAllocator gpa_; +}; + +class GuardedPageAllocatorParamTest + : public GuardedPageAllocatorTest, + public testing::WithParamInterface { + protected: + GuardedPageAllocatorParamTest() : GuardedPageAllocatorTest(GetParam()) {} +}; + +TEST_F(GuardedPageAllocatorTest, SingleAllocDealloc) { + char *buf = reinterpret_cast(gpa_.Allocate(PageSize(), 0)); + EXPECT_NE(buf, nullptr); + EXPECT_TRUE(gpa_.PointerIsMine(buf)); + memset(buf, 'A', PageSize()); + EXPECT_DEATH(buf[-1] = 'A', ""); + EXPECT_DEATH(buf[PageSize()] = 'A', ""); + gpa_.Deallocate(buf); + EXPECT_DEATH(buf[0] = 'B', ""); + EXPECT_DEATH(buf[PageSize() / 2] = 'B', ""); + EXPECT_DEATH(buf[PageSize() - 1] = 'B', ""); +} + +TEST_F(GuardedPageAllocatorTest, AllocDeallocAligned) { + for (size_t align = 1; align <= PageSize(); align <<= 1) { + constexpr size_t alloc_size = 1; + void *p = gpa_.Allocate(alloc_size, align); + EXPECT_NE(p, nullptr); + EXPECT_TRUE(gpa_.PointerIsMine(p)); + EXPECT_EQ(reinterpret_cast(p) % align, 0); + } +} + +TEST_P(GuardedPageAllocatorParamTest, AllocDeallocAllPages) { + size_t num_pages = GetParam(); + char *bufs[kMaxGpaPages]; + for (size_t i = 0; i < num_pages; i++) { + bufs[i] = reinterpret_cast(gpa_.Allocate(1, 0)); + EXPECT_NE(bufs[i], nullptr); + EXPECT_TRUE(gpa_.PointerIsMine(bufs[i])); + } + EXPECT_EQ(gpa_.Allocate(1, 0), nullptr); + gpa_.Deallocate(bufs[0]); + bufs[0] = reinterpret_cast(gpa_.Allocate(1, 0)); + EXPECT_NE(bufs[0], nullptr); + EXPECT_TRUE(gpa_.PointerIsMine(bufs[0])); + for (size_t i = 0; i < num_pages; i++) { + bufs[i][0] = 'A'; + gpa_.Deallocate(bufs[i]); + } +} +INSTANTIATE_TEST_SUITE_P(VaryNumPages, GuardedPageAllocatorParamTest, + testing::Values(1, kMaxGpaPages / 2, kMaxGpaPages)); + +TEST_F(GuardedPageAllocatorTest, PointerIsMine) { + void *buf = gpa_.Allocate(1, 0); + int stack_var; + auto malloc_ptr = absl::make_unique(); + EXPECT_TRUE(gpa_.PointerIsMine(buf)); + EXPECT_FALSE(gpa_.PointerIsMine(&stack_var)); + EXPECT_FALSE(gpa_.PointerIsMine(malloc_ptr.get())); +} + +TEST_F(GuardedPageAllocatorTest, Print) { + char buf[1024] = {}; + TCMalloc_Printer out(buf, sizeof(buf)); + gpa_.Print(&out); + EXPECT_THAT(buf, testing::ContainsRegex("GWP-ASan Status")); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/heap_profiling_test.cc b/tcmalloc/heap_profiling_test.cc new file mode 100644 index 000000000..7035d9534 --- /dev/null +++ b/tcmalloc/heap_profiling_test.cc @@ -0,0 +1,91 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +#include "gtest/gtest.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { +namespace { + +int64_t ProfileSize(ProfileType type) { + int64_t total = 0; + + MallocExtension::SnapshotCurrent(type).Iterate( + [&](const Profile::Sample &e) { total += e.sum; }); + return total; +} + +TEST(HeapProfilingTest, PeakHeapTracking) { + int64_t start_peak_sz = ProfileSize(ProfileType::kPeakHeap); + + // make a large allocation to force a new peak heap sample + // (total live: 50MiB) + void *first = malloc(50 << 20); + int64_t peak_after_first = ProfileSize(ProfileType::kPeakHeap); + EXPECT_NEAR(peak_after_first, start_peak_sz + (50 << 20), 10 << 20); + + // a small allocation shouldn't increase the peak + // (total live: 54MiB) + void *second = malloc(4 << 20); + int64_t peak_after_second = ProfileSize(ProfileType::kPeakHeap); + EXPECT_EQ(peak_after_second, peak_after_first); + + // but a large one should + // (total live: 254MiB) + void *third = malloc(200 << 20); + int64_t peak_after_third = ProfileSize(ProfileType::kPeakHeap); + EXPECT_NEAR(peak_after_third, peak_after_second + (200 << 20), 10 << 20); + + // freeing everything shouldn't affect the peak + // (total live: 0MiB) + free(first); + EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third); + + free(second); + EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third); + + free(third); + EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third); + + // going back up less than previous peak shouldn't affect the peak + // (total live: 200MiB) + void *fourth = malloc(100 << 20); + void *fifth = malloc(100 << 20); + EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third); + + // passing the old peak significantly, even with many small allocations, + // should generate a new one + // (total live: 200MiB + 256MiB = 456MiB, 80% over the 254MiB peak) + void *bitsy[1 << 10]; + for (int i = 0; i < 1 << 10; i++) { + bitsy[i] = malloc(1 << 18); + } + EXPECT_GT(ProfileSize(ProfileType::kPeakHeap), peak_after_third); + + free(fourth); + free(fifth); + for (int i = 0; i < 1 << 10; i++) { + free(bitsy[i]); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/huge_address_map.cc b/tcmalloc/huge_address_map.cc new file mode 100644 index 000000000..193470ea1 --- /dev/null +++ b/tcmalloc/huge_address_map.cc @@ -0,0 +1,370 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/huge_address_map.h" + +#include + +#include +#include + +#include "absl/base/internal/cycleclock.h" +#include "tcmalloc/internal/logging.h" + +// Implementations of functions. +namespace tcmalloc { + +const HugeAddressMap::Node *HugeAddressMap::Node::next() const { + const Node *n = right_; + if (n) { + while (n->left_) n = n->left_; + return n; + } + + n = parent_; + const Node *last = this; + while (n) { + if (n->left_ == last) return n; + last = n; + n = n->parent_; + } + + return nullptr; +} + +HugeAddressMap::Node *HugeAddressMap::Node::next() { + const Node *n = static_cast(this)->next(); + return const_cast(n); +} + +void HugeAddressMap::Node::Check(size_t *num_nodes, HugeLength *size) const { + HugeLength longest = range_.len(); + *num_nodes += 1; + *size += range_.len(); + + if (left_) { + // tree + CHECK_CONDITION(left_->range_.start() < range_.start()); + // disjoint + CHECK_CONDITION(left_->range_.end_addr() < range_.start_addr()); + // well-formed + CHECK_CONDITION(left_->parent_ == this); + // heap + CHECK_CONDITION(left_->prio_ <= prio_); + left_->Check(num_nodes, size); + if (left_->longest_ > longest) longest = left_->longest_; + } + + if (right_) { + // tree + CHECK_CONDITION(right_->range_.start() > range_.start()); + // disjoint + CHECK_CONDITION(right_->range_.start_addr() > range_.end_addr()); + // well-formed + CHECK_CONDITION(right_->parent_ == this); + // heap + CHECK_CONDITION(right_->prio_ <= prio_); + right_->Check(num_nodes, size); + if (right_->longest_ > longest) longest = right_->longest_; + } + + CHECK_CONDITION(longest_ == longest); +} + +const HugeAddressMap::Node *HugeAddressMap::first() const { + const Node *n = root(); + if (!n) return nullptr; + const Node *left = n->left_; + while (left) { + n = left; + left = n->left_; + } + + return n; +} + +HugeAddressMap::Node *HugeAddressMap::first() { + const Node *f = static_cast(this)->first(); + return const_cast(f); +} + +void HugeAddressMap::Check() { + size_t nodes = 0; + HugeLength size = NHugePages(0); + if (root_) { + CHECK_CONDITION(root_->parent_ == nullptr); + root_->Check(&nodes, &size); + } + CHECK_CONDITION(nodes == nranges()); + CHECK_CONDITION(size == total_mapped()); + CHECK_CONDITION(total_nodes_ == used_nodes_ + freelist_size_); +} + +size_t HugeAddressMap::nranges() const { return used_nodes_; } + +HugeLength HugeAddressMap::total_mapped() const { return total_size_; } + +void HugeAddressMap::Print(TCMalloc_Printer *out) const { + out->printf("HugeAddressMap: treap %zu / %zu nodes used / created\n", + used_nodes_, total_nodes_); + const size_t longest = root_ ? root_->longest_.raw_num() : 0; + out->printf("HugeAddressMap: %zu contiguous hugepages available\n", longest); +} + +void HugeAddressMap::PrintInPbtxt(PbtxtRegion *hpaa) const { + hpaa->PrintI64("num_huge_address_map_treap_nodes_used", used_nodes_); + hpaa->PrintI64("num_huge_address_map_treap_nodes_created", total_nodes_); + const size_t longest = root_ ? root_->longest_.in_bytes() : 0; + hpaa->PrintI64("contiguous_free_bytes", longest); +} + +HugeAddressMap::Node *HugeAddressMap::Predecessor(HugePage p) { + Node *n = root(); + Node *best = nullptr; + while (n) { + HugeRange here = n->range_; + if (here.contains(p)) return n; + if (p < here.start()) { + // p comes before here: + // our predecessor isn't here, nor in the right subtree. + n = n->left_; + } else { + // p comes after here: + // here is a valid candidate, and the right subtree might have better. + best = n; + n = n->right_; + } + } + + return best; +} + +void HugeAddressMap::Merge(Node *b, HugeRange r, Node *a) { + auto merge_when = [](HugeRange x, int64_t x_when, HugeRange y, int64_t y_when) { + // avoid overflow with floating-point + const size_t x_len = x.len().raw_num(); + const size_t y_len = y.len().raw_num(); + const double x_weight = static_cast(x_len) * x_when; + const double y_weight = static_cast(y_len) * y_when; + return static_cast((x_weight + y_weight) / (x_len + y_len)); + }; + + int64_t when = absl::base_internal::CycleClock::Now(); + // Two way merges are easy. + if (a == nullptr) { + b->when_ = merge_when(b->range_, b->when(), r, when); + b->range_ = Join(b->range_, r); + FixLongest(b); + return; + } else if (b == nullptr) { + a->when_ = merge_when(r, when, a->range_, a->when()); + a->range_ = Join(r, a->range_); + FixLongest(a); + return; + } + + // Three way merge: slightly harder. We must remove one node + // (arbitrarily picking next). + HugeRange partial = Join(r, a->range_); + int64_t partial_when = merge_when(r, when, a->range_, a->when()); + HugeRange full = Join(b->range_, partial); + int64_t full_when = merge_when(b->range_, b->when(), partial, partial_when); + // Removing a will reduce total_size_ by that length, but since we're merging + // we actually don't change lengths at all; undo that. + total_size_ += a->range_.len(); + Remove(a); + b->range_ = full; + b->when_ = full_when; + FixLongest(b); +} + +void HugeAddressMap::Insert(HugeRange r) { + total_size_ += r.len(); + // First, try to merge if necessary. Note there are three possibilities: + // we might need to merge before with r, r with after, or all three together. + Node *before = Predecessor(r.start()); + CHECK_CONDITION(!before || !before->range_.intersects(r)); + Node *after = before ? before->next() : first(); + CHECK_CONDITION(!after || !after->range_.intersects(r)); + if (before && before->range_.precedes(r)) { + if (after && r.precedes(after->range_)) { + Merge(before, r, after); + } else { + Merge(before, r, nullptr); + } + return; + } else if (after && r.precedes(after->range_)) { + Merge(nullptr, r, after); + return; + } + CHECK_CONDITION(!before || !before->range_.precedes(r)); + CHECK_CONDITION(!after || !r.precedes(after->range_)); + // No merging possible; just add a new node. + Node *n = Get(r); + Node *curr = root(); + Node *parent = nullptr; + Node **link = &root_; + // Walk down the tree to our correct location + while (curr != nullptr && curr->prio_ >= n->prio_) { + curr->longest_ = std::max(curr->longest_, r.len()); + parent = curr; + if (curr->range_.start() < r.start()) { + link = &curr->right_; + curr = curr->right_; + } else { + link = &curr->left_; + curr = curr->left_; + } + } + *link = n; + n->parent_ = parent; + n->left_ = n->right_ = nullptr; + n->longest_ = r.len(); + if (curr) { + HugePage p = r.start(); + // We need to split the treap at curr into n's children. + // This will be two treaps: one less than p, one greater, and has + // a nice recursive structure. + Node **less = &n->left_; + Node *lp = n; + Node **more = &n->right_; + Node *mp = n; + while (curr) { + if (curr->range_.start() < p) { + *less = curr; + curr->parent_ = lp; + less = &curr->right_; + lp = curr; + curr = curr->right_; + } else { + *more = curr; + curr->parent_ = mp; + more = &curr->left_; + mp = curr; + curr = curr->left_; + } + } + *more = *less = nullptr; + // We ripped apart the tree along these two paths--fix longest pointers. + FixLongest(lp); + FixLongest(mp); + } +} + +void HugeAddressMap::Node::FixLongest() { + const HugeLength l = left_ ? left_->longest_ : NHugePages(0); + const HugeLength r = right_ ? right_->longest_ : NHugePages(0); + const HugeLength c = range_.len(); + const HugeLength new_longest = std::max({l, r, c}); + longest_ = new_longest; +} + +void HugeAddressMap::FixLongest(HugeAddressMap::Node *n) { + while (n) { + n->FixLongest(); + n = n->parent_; + } +} + +void HugeAddressMap::Remove(HugeAddressMap::Node *n) { + total_size_ -= n->range_.len(); + // We need to merge the left and right children of n into one + // treap, then glue it into place wherever n was. + Node **link; + Node *parent = n->parent_; + Node *top = n->left_; + Node *bottom = n->right_; + + const HugeLength child_longest = + std::max(top ? top->longest_ : NHugePages(0), + bottom ? bottom->longest_ : NHugePages(0)); + if (!parent) { + link = &root_; + } else { + // Account for the removed child--might change longests. + // Easiest way: update this subtree to ignore the removed node, + // then fix the chain of parents. + n->longest_ = child_longest; + FixLongest(parent); + if (parent->range_.start() > n->range_.start()) { + link = &parent->left_; + } else { + link = &parent->right_; + } + } + + // A routine op we'll need a lot: given two (possibly null) + // children, put the root-ier one into top. + auto reorder_maybe = [](Node **top, Node **bottom) { + Node *b = *bottom, *t = *top; + if (b && (!t || t->prio_ < b->prio_)) { + *bottom = t; + *top = b; + } + }; + + reorder_maybe(&top, &bottom); + // if we have two treaps to merge (top is always non-null if bottom is) + // Invariant: top, bottom are two valid (longest included) + // treaps. parent (and all above/elsewhere) have the correct longest + // values, though parent does not have the correct children (will be the + // merged value of top and bottom.) + while (bottom) { + *link = top; + top->parent_ = parent; + // We're merging bottom into top, so top might contain a longer + // chunk than it thinks. + top->longest_ = std::max(top->longest_, bottom->longest_); + parent = top; + if (bottom->range_.start() < top->range_.start()) { + link = &top->left_; + top = top->left_; + } else { + link = &top->right_; + top = top->right_; + } + reorder_maybe(&top, &bottom); + } + *link = top; + if (top) top->parent_ = parent; + Put(n); +} + +void HugeAddressMap::Put(Node *n) { + freelist_size_++; + used_nodes_--; + n->left_ = freelist_; + freelist_ = n; +} + +HugeAddressMap::Node *HugeAddressMap::Get(HugeRange r) { + CHECK_CONDITION((freelist_ == nullptr) == (freelist_size_ == 0)); + used_nodes_++; + int prio = rand_r(&seed_); + if (freelist_size_ == 0) { + total_nodes_++; + Node *ret = reinterpret_cast(meta_(sizeof(Node))); + return new (ret) Node(r, prio); + } + + freelist_size_--; + Node *ret = freelist_; + freelist_ = ret->left_; + return new (ret) Node(r, prio); +} + +HugeAddressMap::Node::Node(HugeRange r, int prio) + : range_(r), prio_(prio), when_(absl::base_internal::CycleClock::Now()) {} + +} // namespace tcmalloc diff --git a/tcmalloc/huge_address_map.h b/tcmalloc/huge_address_map.h new file mode 100644 index 000000000..1d6660828 --- /dev/null +++ b/tcmalloc/huge_address_map.h @@ -0,0 +1,152 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_HUGE_ADDRESS_MAP_H_ +#define TCMALLOC_HUGE_ADDRESS_MAP_H_ +#include +#include + +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { + +// Maintains a set of disjoint HugeRanges, merging adjacent ranges into one. +// Exposes a balanced (somehow) binary tree of free ranges on address, +// augmented with the largest range in each subtree (this allows fairly simple +// allocation algorithms from the contained ranges. +// +// This class scales well and is *reasonably* performant, but it is not intended +// for use on extremely hot paths. +// TODO(b/134688982): extend to support other range-like types? +class HugeAddressMap { + public: + typedef void *(*MetadataAllocFunction)(size_t bytes); + explicit constexpr HugeAddressMap(MetadataAllocFunction meta); + + // IMPORTANT: DESTROYING A HUGE ADDRESS MAP DOES NOT MAKE ANY ATTEMPT + // AT FREEING ALLOCATED METADATA. + ~HugeAddressMap() = default; + + class Node { + public: + // the range stored at this point + HugeRange range() const; + // Tree structure + Node *left(); + const Node *left() const; + Node *right(); + const Node *right() const; + // Iterate to the next node in address order + const Node *next() const; + Node *next(); + // when were this node's content added (in + // absl::base_internal::CycleClock::Now units)? + int64_t when() const; + + // What is the length of the longest range in the subtree rooted here? + HugeLength longest() const; + + private: + Node(HugeRange r, int prio); + friend class HugeAddressMap; + HugeRange range_; + int prio_; // chosen randomly + Node *left_, *right_; + Node *parent_; + HugeLength longest_; + int64_t when_; + // Expensive, recursive consistency check. + // Accumulates node count and range sizes into passed arguments. + void Check(size_t *num_nodes, HugeLength *size) const; + + // We've broken longest invariants somehow; fix them here. + void FixLongest(); + }; + + // Get root of the tree. + Node *root(); + const Node *root() const; + + // Get lowest-addressed node + const Node *first() const; + Node *first(); + + // Returns the highest-addressed range that does not lie completely + // after p (if any). + Node *Predecessor(HugePage p); + + // Expensive consistency check. + void Check(); + + // Statistics + size_t nranges() const; + HugeLength total_mapped() const; + void Print(TCMalloc_Printer *out) const; + void PrintInPbtxt(PbtxtRegion *hpaa) const; + + // Add to the map, merging with adjacent ranges as needed. + void Insert(HugeRange r); + + // Delete n from the map. + void Remove(Node *n); + + private: + // our tree + Node *root_{nullptr}; + size_t used_nodes_{0}; + HugeLength total_size_{NHugePages(0)}; + + // cache of unused nodes + Node *freelist_{nullptr}; + size_t freelist_size_{0}; + // How we get more + MetadataAllocFunction meta_; + Node *Get(HugeRange r); + void Put(Node *n); + + size_t total_nodes_{0}; + + void Merge(Node *b, HugeRange r, Node *a); + void FixLongest(Node *n); + // Note that we always use the same seed, currently; this isn't very random. + // In practice we're not worried about adversarial input and this works well + // enough. + unsigned int seed_{0}; +}; + +inline constexpr HugeAddressMap::HugeAddressMap(MetadataAllocFunction meta) + : meta_(meta) {} + +inline HugeRange HugeAddressMap::Node::range() const { return range_; } +inline HugeAddressMap::Node *HugeAddressMap::Node::left() { return left_; } +inline const HugeAddressMap::Node *HugeAddressMap::Node::left() const { + return left_; +} +inline HugeAddressMap::Node *HugeAddressMap::Node::right() { return right_; } +inline const HugeAddressMap::Node *HugeAddressMap::Node::right() const { + return right_; +} + +inline int64_t HugeAddressMap::Node::when() const { return when_; } +inline HugeLength HugeAddressMap::Node::longest() const { return longest_; } + +inline HugeAddressMap::Node *HugeAddressMap::root() { return root_; } +inline const HugeAddressMap::Node *HugeAddressMap::root() const { + return root_; +} + +} // namespace tcmalloc + +#endif // TCMALLOC_HUGE_ADDRESS_MAP_H_ diff --git a/tcmalloc/huge_address_map_test.cc b/tcmalloc/huge_address_map_test.cc new file mode 100644 index 000000000..405fbb5b9 --- /dev/null +++ b/tcmalloc/huge_address_map_test.cc @@ -0,0 +1,82 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/huge_address_map.h" + +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace tcmalloc { +namespace { + +class HugeAddressMapTest : public ::testing::Test { + protected: + HugeAddressMapTest() : map_(MallocMetadata) { metadata_allocs_.clear(); } + + ~HugeAddressMapTest() override { + for (void *p : metadata_allocs_) { + free(p); + } + } + + std::vector Contents() { + std::vector ret; + auto node = map_.first(); + while (node) { + ret.push_back(node->range()); + node = node->next(); + } + + return ret; + } + + HugePage hp(size_t i) { return {i}; } + HugeLength hl(size_t i) { return NHugePages(i); } + + HugeAddressMap map_; + + private: + static void *MallocMetadata(size_t size) { + void *ptr = malloc(size); + metadata_allocs_.push_back(ptr); + return ptr; + } + + static std::vector metadata_allocs_; +}; + +std::vector HugeAddressMapTest::metadata_allocs_; + +// This test verifies that HugeAddressMap merges properly. +TEST_F(HugeAddressMapTest, Merging) { + const HugeRange r1 = HugeRange::Make(hp(0), hl(1)); + const HugeRange r2 = HugeRange::Make(hp(1), hl(1)); + const HugeRange r3 = HugeRange::Make(hp(2), hl(1)); + const HugeRange all = Join(r1, Join(r2, r3)); + map_.Insert(r1); + map_.Check(); + EXPECT_THAT(Contents(), testing::ElementsAre(r1)); + map_.Insert(r3); + map_.Check(); + EXPECT_THAT(Contents(), testing::ElementsAre(r1, r3)); + map_.Insert(r2); + map_.Check(); + EXPECT_THAT(Contents(), testing::ElementsAre(all)); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/huge_allocator.cc b/tcmalloc/huge_allocator.cc new file mode 100644 index 000000000..4b1317959 --- /dev/null +++ b/tcmalloc/huge_allocator.cc @@ -0,0 +1,172 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/huge_allocator.h" + +#include + +#include "tcmalloc/huge_address_map.h" +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { + +void HugeAllocator::Print(TCMalloc_Printer *out) { + out->printf("HugeAllocator: contiguous, unbacked hugepage(s)\n"); + free_.Print(out); + out->printf( + "HugeAllocator: %zu requested - %zu in use = %zu hugepages free\n", + from_system_.raw_num(), in_use_.raw_num(), + (from_system_ - in_use_).raw_num()); +} + +void HugeAllocator::PrintInPbtxt(PbtxtRegion *hpaa) const { + free_.PrintInPbtxt(hpaa); + hpaa->PrintI64("num_total_requested_huge_pages", from_system_.raw_num()); + hpaa->PrintI64("num_in_use_huge_pages", in_use_.raw_num()); +} + +HugeAddressMap::Node *HugeAllocator::Find(HugeLength n) { + HugeAddressMap::Node *curr = free_.root(); + // invariant: curr != nullptr && curr->longest >= n + // we favor smaller gaps and lower nodes and lower addresses, in that + // order. The net effect is that we are neither a best-fit nor a + // lowest-address allocator but vaguely close to both. + HugeAddressMap::Node *best = nullptr; + while (curr && curr->longest() >= n) { + if (curr->range().len() >= n) { + if (!best || best->range().len() > curr->range().len()) { + best = curr; + } + } + + // Either subtree could contain a better fit and we don't want to + // search the whole tree. Pick a reasonable child to look at. + auto left = curr->left(); + auto right = curr->right(); + if (!left || left->longest() < n) { + curr = right; + continue; + } + + if (!right || right->longest() < n) { + curr = left; + continue; + } + + // Here, we have a nontrivial choice. + if (left->range().len() == right->range().len()) { + if (left->longest() <= right->longest()) { + curr = left; + } else { + curr = right; + } + } else if (left->range().len() < right->range().len()) { + // Here, the longest range in both children is the same...look + // in the subtree with the smaller root, as that's slightly + // more likely to be our best. + curr = left; + } else { + curr = right; + } + } + return best; +} + +void HugeAllocator::CheckFreelist() { + free_.Check(); + size_t num_nodes = free_.nranges(); + HugeLength n = free_.total_mapped(); + free_.Check(); + CHECK_CONDITION(n == from_system_ - in_use_); + LargeSpanStats large; + memset(&large, 0, sizeof(large)); + AddSpanStats(nullptr, &large, nullptr); + CHECK_CONDITION(num_nodes == large.spans); + CHECK_CONDITION(n.in_pages() == large.returned_pages); +} + +HugeRange HugeAllocator::AllocateRange(HugeLength n) { + if (n.overflows()) return HugeRange::Nil(); + size_t actual; + size_t bytes = n.in_bytes(); + size_t align = kHugePageSize; + void *ptr = allocate_(bytes, &actual, align); + if (ptr == nullptr) { + // OOM... + return HugeRange::Nil(); + } + CHECK_CONDITION(ptr != nullptr); + // It's possible for a request to return extra hugepages. + CHECK_CONDITION(actual % kHugePageSize == 0); + n = HLFromBytes(actual); + from_system_ += n; + return HugeRange::Make(HugePageContaining(ptr), n); +} + +HugeRange HugeAllocator::Get(HugeLength n) { + CHECK_CONDITION(n > NHugePages(0)); + auto *node = Find(n); + if (!node) { + // Get more memory, then "delete" it + HugeRange r = AllocateRange(n); + if (!r.valid()) return r; + in_use_ += r.len(); + Release(r); + node = Find(n); + CHECK_CONDITION(node != nullptr); + } + in_use_ += n; + + HugeRange r = node->range(); + free_.Remove(node); + if (r.len() > n) { + HugeLength before = r.len(); + HugeRange extra = HugeRange::Make(r.start() + n, before - n); + r = HugeRange::Make(r.start(), n); + ASSERT(r.precedes(extra)); + ASSERT(r.len() + extra.len() == before); + in_use_ += extra.len(); + Release(extra); + } else { + // Release does this for us + DebugCheckFreelist(); + } + + return r; +} + +void HugeAllocator::Release(HugeRange r) { + in_use_ -= r.len(); + + free_.Insert(r); + DebugCheckFreelist(); +} + +void HugeAllocator::AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, + PageAgeHistograms *ages) const { + for (const HugeAddressMap::Node *node = free_.first(); node != nullptr; + node = node->next()) { + HugeLength n = node->range().len(); + if (large != nullptr) { + large->spans++; + large->returned_pages += n.in_pages(); + } + + if (ages != nullptr) { + ages->RecordRange(n.in_pages(), true, node->when()); + } + } +} + +} // namespace tcmalloc diff --git a/tcmalloc/huge_allocator.h b/tcmalloc/huge_allocator.h new file mode 100644 index 000000000..7b4c36398 --- /dev/null +++ b/tcmalloc/huge_allocator.h @@ -0,0 +1,103 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Tracking information for the available range of hugepages, +// and a basic allocator for unmapped hugepages. +#ifndef TCMALLOC_HUGE_ALLOCATOR_H_ +#define TCMALLOC_HUGE_ALLOCATOR_H_ + +#include + +#include "tcmalloc/common.h" +#include "tcmalloc/huge_address_map.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc { +// these typedefs allow replacement of tcmalloc::System* for tests. +typedef void *(*MemoryAllocFunction)(size_t bytes, size_t *actual, + size_t align); +typedef void *(*MetadataAllocFunction)(size_t bytes); + +// This tracks available ranges of hugepages and fulfills requests for +// usable memory, allocating more from the system as needed. All +// hugepages are treated as (and assumed to be) unbacked. +class HugeAllocator { + public: + constexpr HugeAllocator(MemoryAllocFunction allocate, + MetadataAllocFunction meta_allocate) + : free_(meta_allocate), allocate_(allocate) {} + + // Obtain a range of n unbacked hugepages, distinct from all other + // calls to Get (other than those that have been Released.) + HugeRange Get(HugeLength n); + + // Returns a range of hugepages for reuse by subsequent Gets(). + // REQUIRES: is the return value (or a subrange thereof) of a previous + // call to Get(); neither nor any overlapping range has been released + // since that Get(). + void Release(HugeRange r); + + // Total memory requested from the system, whether in use or not, + HugeLength system() const { return from_system_; } + // Unused memory in the allocator. + HugeLength size() const { return from_system_ - in_use_; } + + void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, + PageAgeHistograms *ages) const; + + BackingStats stats() const { + BackingStats s; + s.system_bytes = system().in_bytes(); + s.free_bytes = 0; + s.unmapped_bytes = size().in_bytes(); + return s; + } + + void Print(TCMalloc_Printer *out); + void PrintInPbtxt(PbtxtRegion *hpaa) const; + + private: + // We're constrained in several ways by existing code. Hard requirements: + // * no radix tree or similar O(address space) external space tracking + // * support sub releasing + // * low metadata overhead + // * no pre-allocation. + // * reasonable space overhead + // + // We use a treap ordered on addresses to track. This isn't the most + // efficient thing ever but we're about to hit 100usec+/hugepage + // backing costs if we've gotten this far; the last few bits of performance + // don't matter, and most of the simple ideas can't hit all of the above + // requirements. + HugeAddressMap free_; + HugeAddressMap::Node *Find(HugeLength n); + + void CheckFreelist(); + void DebugCheckFreelist() { +#ifndef NDEBUG + CheckFreelist(); +#endif + } + + HugeLength from_system_{NHugePages(0)}; + HugeLength in_use_{NHugePages(0)}; + + MemoryAllocFunction allocate_; + HugeRange AllocateRange(HugeLength n); +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_HUGE_ALLOCATOR_H_ diff --git a/tcmalloc/huge_allocator_test.cc b/tcmalloc/huge_allocator_test.cc new file mode 100644 index 000000000..a9221a38e --- /dev/null +++ b/tcmalloc/huge_allocator_test.cc @@ -0,0 +1,450 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/huge_allocator.h" + +#include +#include +#include + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/base/internal/cycleclock.h" +#include "absl/random/random.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { +namespace { + +class HugeAllocatorTest : public testing::TestWithParam { + private: + // Use a tiny fraction of actual size so we can test aggressively. + static void *AllocateFake(size_t bytes, size_t *actual, size_t align); + + static constexpr size_t kMaxBacking = 1024 * 1024; + // This isn't super good form but we'll never have more than one HAT + // extant at once. + static std::vector backing_; + + // We use actual malloc for metadata allocations, but we track them so they + // can be deleted. + static void *MallocMetadata(size_t size); + static std::vector metadata_allocs_; + static size_t metadata_bytes_; + static bool should_overallocate_; + static HugeLength huge_pages_requested_; + static HugeLength huge_pages_received_; + + protected: + HugeLength HugePagesRequested() { return huge_pages_requested_; } + HugeLength HugePagesReceived() { return huge_pages_received_; } + + size_t MetadataBytes() { return metadata_bytes_; } + + HugeAllocatorTest() { + should_overallocate_ = GetParam(); + huge_pages_requested_ = NHugePages(0); + huge_pages_received_ = NHugePages(0); + // We don't use the first few bytes, because things might get weird + // given zero pointers. + backing_.resize(1024); + metadata_bytes_ = 0; + } + + ~HugeAllocatorTest() override { + for (void *p : metadata_allocs_) { + free(p); + } + metadata_allocs_.clear(); + backing_.clear(); + } + + size_t *GetActual(HugePage p) { return &backing_[p.index()]; } + + // We're dealing with a lot of memory, so we don't want to do full memset + // and then check every byte for corruption. So set the first and last + // byte in each page... + void CheckPages(HugeRange r, size_t c) { + for (HugePage p = r.first; p < r.first + r.n; ++p) { + EXPECT_EQ(c, *GetActual(p)); + } + } + + void MarkPages(HugeRange r, size_t c) { + for (HugePage p = r.first; p < r.first + r.n; ++p) { + *GetActual(p) = c; + } + } + + void CheckStats(HugeLength expected_use) { + const HugeLength received = HugePagesReceived(); + EXPECT_EQ(received, allocator_.system()); + HugeLength used = received - allocator_.size(); + EXPECT_EQ(used, expected_use); + } + + HugeAllocator allocator_{AllocateFake, MallocMetadata}; +}; + +// Use a tiny fraction of actual size so we can test aggressively. +void *HugeAllocatorTest::AllocateFake(size_t bytes, size_t *actual, + size_t align) { + CHECK_CONDITION(bytes % kHugePageSize == 0); + CHECK_CONDITION(align % kHugePageSize == 0); + HugeLength req = HLFromBytes(bytes); + huge_pages_requested_ += req; + // Test the case where our sys allocator provides too much. + if (should_overallocate_) ++req; + huge_pages_received_ += req; + *actual = req.in_bytes(); + // we'll actually provide hidden backing, one word per hugepage. + bytes = req / NHugePages(1); + align /= kHugePageSize; + size_t index = backing_.size(); + if (index % align != 0) { + index += (align - (index & align)); + } + if (index + bytes > kMaxBacking) return nullptr; + backing_.resize(index + bytes); + void *ptr = reinterpret_cast(index * kHugePageSize); + return ptr; +} + +// We use actual malloc for metadata allocations, but we track them so they +// can be deleted. +void *HugeAllocatorTest::MallocMetadata(size_t size) { + metadata_bytes_ += size; + void *ptr = malloc(size); + metadata_allocs_.push_back(ptr); + return ptr; +} + +std::vector HugeAllocatorTest::backing_; +std::vector HugeAllocatorTest::metadata_allocs_; +size_t HugeAllocatorTest::metadata_bytes_; +bool HugeAllocatorTest::should_overallocate_; +HugeLength HugeAllocatorTest::huge_pages_requested_; +HugeLength HugeAllocatorTest::huge_pages_received_; + +TEST_P(HugeAllocatorTest, Basic) { + std::vector> allocs; + absl::BitGen rng; + size_t label = 0; + HugeLength total = NHugePages(0); + static const size_t kSize = 1000; + HugeLength peak = total; + for (int i = 0; i < kSize; ++i) { + HugeLength len = + NHugePages(absl::LogUniform(rng, 0, (1 << 12) - 1) + 1); + auto r = allocator_.Get(len); + ASSERT_TRUE(r.valid()); + total += len; + peak = std::max(peak, total); + CheckStats(total); + MarkPages(r, label); + allocs.push_back({r, label}); + label++; + } + + for (int i = 0; i < 1000 * 25; ++i) { + size_t index = absl::Uniform(rng, 0, kSize); + std::swap(allocs[index], allocs[kSize - 1]); + auto p = allocs[kSize - 1]; + CheckPages(p.first, p.second); + total -= p.first.len(); + allocator_.Release(p.first); + CheckStats(total); + + HugeLength len = + NHugePages(absl::LogUniform(rng, 0, (1 << 12) - 1) + 1); + auto r = allocator_.Get(len); + ASSERT_TRUE(r.valid()); + ASSERT_EQ(r.len(), len); + total += len; + peak = std::max(peak, total); + CheckStats(total); + MarkPages(r, label); + allocs[kSize - 1] = {r, label}; + label++; + } + for (auto p : allocs) { + CheckPages(p.first, p.second); + allocator_.Release(p.first); + } +} + +// Check that releasing small chunks of allocations works OK. +TEST_P(HugeAllocatorTest, Subrelease) { + size_t label = 1; + const HugeLength kLen = NHugePages(8); + const HugeLength kTotal = kLen * (kLen / NHugePages(1) - 1); + for (int i = 0; i < 100; ++i) { + std::vector> allocs; + // get allocs of kLen and release different sized sub-chunks of them - + // make sure that doesn't break anything else. + for (HugeLength j = NHugePages(1); j < kLen; ++j) { + auto r = allocator_.Get(kLen); + ASSERT_TRUE(r.valid()); + MarkPages(r, label); + allocator_.Release({r.start(), j}); + allocs.push_back({{r.start() + j, kLen - j}, label}); + label++; + } + EXPECT_EQ(kTotal, HugePagesRequested()); + for (auto p : allocs) { + CheckPages(p.first, p.second); + allocator_.Release(p.first); + } + } +} + +// Does subreleasing work OK for absurdly large allocations? +TEST_P(HugeAllocatorTest, SubreleaseLarge) { + absl::BitGen rng; + std::vector> allocs; + size_t label = 1; + const HugeLength kLimit = HLFromBytes(1024ul * 1024 * 1024 * 1024); + for (HugeLength n = NHugePages(2); n < kLimit; n *= 2) { + auto r = allocator_.Get(n); + ASSERT_TRUE(r.valid()); + MarkPages(r, label); + // chunk of less than half + HugeLength chunk = + NHugePages(absl::Uniform(rng, 0, n / NHugePages(2)) + 1); + allocator_.Release({r.start(), chunk}); + allocs.push_back({{r.start() + chunk, n - chunk}, label}); + label++; + } + // reuse the released space + const HugeLength total = HugePagesRequested(); + while (total == HugePagesRequested()) { + HugeLength n = + NHugePages(absl::LogUniform(rng, 0, (1 << 8) - 1) + 1); + auto r = allocator_.Get(n); + ASSERT_TRUE(r.valid()); + MarkPages(r, label); + allocs.push_back({r, label}); + label++; + } + for (auto p : allocs) { + CheckPages(p.first, p.second); + allocator_.Release(p.first); + } +} + +// We don't care *that* much about vaddress space, but let's not be crazy. +// Don't fill tiny requests from big spaces. +TEST_P(HugeAllocatorTest, Fragmentation) { + // Prime the pump with some random allocations. + absl::BitGen rng; + + std::vector free; + constexpr int kSlots = 50; + + // Plan to insert a large allocation at the big_slot'th index, then free it + // during the initial priming step (so we have at least a contiguous region of + // at least big hugepages). + HugeLength big = NHugePages(8); + const int big_slot = absl::Uniform(rng, 0, kSlots); + + for (int i = 0; i < kSlots; ++i) { + if (i == big_slot) { + auto r = allocator_.Get(big); + ASSERT_TRUE(r.valid()); + free.push_back(r); + } + + auto r = allocator_.Get(NHugePages(1)); + ASSERT_TRUE(r.valid()); + if (absl::Bernoulli(rng, 1.0 / 2)) { + free.push_back(r); + } + } + size_t slots = free.size() - 1; + for (auto r : free) { + allocator_.Release(r); + } + free.clear(); + static const size_t kReps = 5; + for (int i = 0; i < kReps; ++i) { + SCOPED_TRACE(i); + + // Ensure we have a range of this size. + HugeRange r = allocator_.Get(big); + ASSERT_TRUE(r.valid()); + if (NHugePages(slots) > allocator_.size()) { + // We should also have slots pages left over after allocating big + for (int i = 0; i < slots; ++i) { + HugeRange f = allocator_.Get(NHugePages(1)); + ASSERT_TRUE(f.valid()); + free.push_back(f); + } + for (auto f : free) { + allocator_.Release(f); + } + free.clear(); + } + allocator_.Release(r); + // We should definitely have at least this many small spaces... + for (int i = 0; i < slots; ++i) { + r = allocator_.Get(NHugePages(1)); + ASSERT_TRUE(r.valid()); + free.push_back(r); + } + // that don't interfere with the available big space. + auto before = allocator_.system(); + r = allocator_.Get(big); + ASSERT_TRUE(r.valid()); + EXPECT_EQ(before, allocator_.system()); + allocator_.Release(r); + for (auto r : free) { + allocator_.Release(r); + } + free.clear(); + slots += big.raw_num(); + big += big; + } +} + +// Check that we only request as much as we actually need from the system. +TEST_P(HugeAllocatorTest, Frugal) { + HugeLength total = NHugePages(0); + static const size_t kSize = 1000; + for (int i = 1; i < kSize; ++i) { + HugeLength len = NHugePages(i); + // toss the range, we ain't using it + ASSERT_TRUE(allocator_.Get(len).valid()); + + total += len; + CheckStats(total); + EXPECT_EQ(total, HugePagesRequested()); + } +} + +TEST_P(HugeAllocatorTest, Stats) { + struct Helper { + static void Stats(const HugeAllocator *huge, size_t *num_spans, + size_t *pages, absl::Duration *avg_age) { + SmallSpanStats small; + LargeSpanStats large; + memset(&small, 0, sizeof(small)); + memset(&large, 0, sizeof(large)); + PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); + huge->AddSpanStats(&small, &large, &ages); + for (int i = 0; i < kMaxPages; ++i) { + EXPECT_EQ(0, small.normal_length[0]); + EXPECT_EQ(0, small.returned_length[0]); + } + *num_spans = large.spans; + EXPECT_EQ(0, large.normal_pages); + *pages = large.returned_pages; + const PageAgeHistograms::Histogram *hist = ages.GetTotalHistogram(true); + *avg_age = absl::Seconds(hist->avg_age()); + } + }; + + if (GetParam()) { + // Ensure overallocation doesn't skew our measurements below. + allocator_.Release(allocator_.Get(NHugePages(7))); + } + const HugeRange r = allocator_.Get(NHugePages(8)); + ASSERT_TRUE(r.valid()); + const HugePage p = r.start(); + // Break it into 3 ranges, separated by one-page regions, + // so we can easily track the internal state in stats. + const HugeRange r1 = {p, NHugePages(1)}; + const HugeRange b1 = {p + NHugePages(1), NHugePages(1)}; + const HugeRange r2 = {p + NHugePages(2), NHugePages(2)}; + const HugeRange b2 = {p + NHugePages(4), NHugePages(1)}; + const HugeRange r3 = {p + NHugePages(5), NHugePages(3)}; + + size_t num_spans, pages; + absl::Duration avg_age; + + Helper::Stats(&allocator_, &num_spans, &pages, &avg_age); + EXPECT_EQ(0, num_spans); + EXPECT_EQ(0, pages); + EXPECT_EQ(absl::ZeroDuration(), avg_age); + + allocator_.Release(r1); + constexpr absl::Duration kDelay = absl::Milliseconds(500); + absl::SleepFor(kDelay); + Helper::Stats(&allocator_, &num_spans, &pages, &avg_age); + EXPECT_EQ(1, num_spans); + EXPECT_EQ(NHugePages(1).in_pages(), pages); + // We can only do >= testing, because we might be arbitrarily delayed. + // Since avg_age is computed in floating point, we may have round-off from + // TCMalloc's internal use of absl::base_internal::CycleClock down through + // computing the average age of the spans. kEpsilon allows for a tiny amount + // of slop. + constexpr absl::Duration kEpsilon = absl::Microseconds(200); + EXPECT_LE(kDelay - kEpsilon, avg_age); + + allocator_.Release(r2); + absl::SleepFor(absl::Milliseconds(250)); + Helper::Stats(&allocator_, &num_spans, &pages, &avg_age); + EXPECT_EQ(2, num_spans); + EXPECT_EQ(NHugePages(3).in_pages(), pages); + EXPECT_LE( + (absl::Seconds(0.75) * 1 + absl::Seconds(0.25) * 2) / (1 + 2) - kEpsilon, + avg_age); + + allocator_.Release(r3); + absl::SleepFor(absl::Milliseconds(125)); + Helper::Stats(&allocator_, &num_spans, &pages, &avg_age); + EXPECT_EQ(3, num_spans); + EXPECT_EQ(NHugePages(6).in_pages(), pages); + EXPECT_LE((absl::Seconds(0.875) * 1 + absl::Seconds(0.375) * 2 + + absl::Seconds(0.125) * 3) / + (1 + 2 + 3) - + kEpsilon, + avg_age); + + allocator_.Release(b1); + allocator_.Release(b2); + absl::SleepFor(absl::Milliseconds(100)); + Helper::Stats(&allocator_, &num_spans, &pages, &avg_age); + EXPECT_EQ(1, num_spans); + EXPECT_EQ(NHugePages(8).in_pages(), pages); + EXPECT_LE((absl::Seconds(0.975) * 1 + absl::Seconds(0.475) * 2 + + absl::Seconds(0.225) * 3 + absl::Seconds(0.1) * 2) / + (1 + 2 + 3 + 2) - + kEpsilon, + avg_age); +} + +// Make sure we're well-behaved in the presence of OOM (and that we do +// OOM at some point...) +TEST_P(HugeAllocatorTest, OOM) { + HugeLength n = NHugePages(1); + while (allocator_.Get(n).valid()) { + n *= 2; + } +} + +INSTANTIATE_TEST_SUITE_P( + NormalOverAlloc, HugeAllocatorTest, testing::Values(false, true), + +[](const testing::TestParamInfo &info) { + return info.param ? "overallocates" : "normal"; + }); + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/huge_cache.cc b/tcmalloc/huge_cache.cc new file mode 100644 index 000000000..312e097e8 --- /dev/null +++ b/tcmalloc/huge_cache.cc @@ -0,0 +1,595 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/huge_cache.h" + +#include + +#include "absl/time/time.h" +#include "tcmalloc/common.h" +#include "tcmalloc/huge_address_map.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc { + +// Erases values from the window that are out of date; sets i to the +// current location in the ringbuffer. +template +void MinMaxTracker::UpdateClock() { + const size_t epoch = clock_() / ToInt64Nanoseconds(kEpochLength); + // How many time steps did we take? (Since we only record kEpochs + // time steps, we can pretend it was at most that.) + size_t delta = epoch - last_epoch_; + delta = std::min(delta, kEpochs); + last_epoch_ = epoch; + + // At each tick, we move our current location by one, to a new location + // that contains too-old data (which must be zeroed.) + for (size_t offset = 0; offset < delta; ++offset) { + i_++; + if (i_ == kEpochs) i_ = 0; + window_[i_] = Extrema::Nil(); + } +} + +template +void MinMaxTracker::Report(HugeLength val) { + UpdateClock(); + window_[i_].Report(val); +} + +template +HugeLength MinMaxTracker::MaxOverTime(absl::Duration t) const { + size_t j = i_; + size_t num_epochs = ceil(absl::FDivDuration(t, kEpochLength)); + ASSERT(num_epochs <= kEpochs); + HugeLength m = NHugePages(0); + for (size_t offset = 0; offset < num_epochs; ++offset) { + m = std::max(m, window_[j].max); + if (j == 0) j = kEpochs; + --j; + } + + return m; +} + +template +HugeLength MinMaxTracker::MinOverTime(absl::Duration t) const { + size_t j = i_; + size_t num_epochs = ceil(absl::FDivDuration(t, kEpochLength)); + ASSERT(num_epochs <= kEpochs); + HugeLength m = kMaxVal; + for (size_t offset = 0; offset < num_epochs; ++offset) { + m = std::min(m, window_[j].min); + if (j == 0) j = kEpochs; + --j; + } + + // We only move epochs when we report values, which shouldn't be at + // the boundary value, so we should see something normal here. + ASSERT(m < kMaxVal); + return m; +} + +template +void MinMaxTracker::Print(TCMalloc_Printer *out) const { + // Prints timestamp:min_pages:max_pages for each window with records. + // Timestamp == kEpochs - 1 is the most recent measurement. + const long long millis = absl::ToInt64Milliseconds(kEpochLength); + out->printf("\nHugeCache: window %lldms * %zu", millis, kEpochs); + int written = 0; + size_t j = i_ + 1; + if (j == kEpochs) j = 0; + for (int offset = 0; offset < kEpochs; offset++) { + if (window_[j] != Extrema::Nil()) { + if (written % 100 == 0) out->printf("\nHugeCache: Usage timeseries "); + out->printf("%d:%zu:%zd,", offset, window_[j].min.raw_num(), + window_[j].max.raw_num()); + written++; + } + j++; + if (j == kEpochs) j = 0; + } + out->printf("\n"); +} + +template +void MinMaxTracker::PrintInPbtxt(PbtxtRegion *hpaa) const { + // Prints timestamp:min_pages:max_pages for each window with records. + // Timestamp == kEpochs - 1 is the most recent measurement. + auto huge_cache_history = hpaa->CreateSubRegion("huge_cache_history"); + huge_cache_history.PrintI64("window_ms", + absl::ToInt64Milliseconds(kEpochLength)); + huge_cache_history.PrintI64("epochs", kEpochs); + + int written = 0; + size_t j = i_ + 1; + if (j == kEpochs) j = 0; + for (int offset = 0; offset < kEpochs; offset++) { + if (window_[j] != Extrema::Nil()) { + auto m = huge_cache_history.CreateSubRegion("measurements"); + m.PrintI64("epoch", offset); + m.PrintI64("min_bytes", window_[j].min.in_bytes()); + m.PrintI64("max_bytes", window_[j].max.in_bytes()); + written++; + } + j++; + if (j == kEpochs) j = 0; + } +} + +template +bool MinMaxTracker::Extrema::operator==(const Extrema &other) const { + return (other.max == max) && (other.min == min); +} + +template +bool MinMaxTracker::Extrema::operator!=(const Extrema &other) const { + return !(this->operator==(other)); +} + +// Explicit instantiations of template +template class MinMaxTracker<>; +template class MinMaxTracker<600>; + +void MovingAverageTracker::Report(HugeLength val) { + int64_t now = clock_(); + if (rolling_max_average_ < 1 || val >= HugeLength(rolling_max_average_ - 1)) { + rolling_max_average_ = val.raw_num(); + last_update_ = now; + last_val_ = val; + return; + } + absl::Duration delta = absl::Nanoseconds(now - last_update_); + if (delta < kResolution) { + last_max_ = std::max(last_max_, val); + } else if (delta < kTimeConstant) { + while (delta > kResolution) { + rolling_max_average_ = + (static_cast(2 * last_max_.raw_num()) + + rolling_max_average_ * (res_per_time_constant_ - 1)) / + (res_per_time_constant_ + 1); + delta -= kResolution; + last_update_ += absl::ToInt64Nanoseconds(kResolution); + } + last_max_ = std::max(last_val_, val); + } else { + // Old data is too old + rolling_max_average_ = std::max(last_val_, val).raw_num(); + last_update_ = now; + } + last_val_ = val; +} + +HugeLength MovingAverageTracker::RollingMaxAverage() const { + return NHugePages(rolling_max_average_); +} + +// The logic for actually allocating from the cache or backing, and keeping +// the hit rates specified. +HugeRange HugeCache::DoGet(HugeLength n, bool *from_released) { + auto *node = Find(n); + if (!node) { + misses_++; + weighted_misses_ += n.raw_num(); + HugeRange res = allocator_->Get(n); + if (res.valid()) { + *from_released = true; + } + + return res; + } + hits_++; + weighted_hits_ += n.raw_num(); + *from_released = false; + size_ -= n; + UpdateSize(size()); + HugeRange result, leftover; + // Put back whatever we have left (or nothing, if it's exact.) + std::tie(result, leftover) = Split(node->range(), n); + cache_.Remove(node); + if (leftover.valid()) { + cache_.Insert(leftover); + } + return result; +} + +void HugeCache::MaybeGrowCacheLimit(HugeLength missed) { + // Our goal is to make the cache size = the largest "brief dip." + // + // A "dip" being a case where usage shrinks, then increases back up + // to previous levels (at least partially). + // + // "brief" is "returns to normal usage in < kCacheTime." (In + // other words, we ideally want to be willing to cache memory for + // kCacheTime before expecting it to be used again--we are loose + // on the timing..) + // + // The interesting part is finding those dips. + + // This is the downward slope: we lost some usage. (This in theory could + // be as much as 2 * kCacheTime old, which is fine.) + const HugeLength shrink = off_peak_tracker_.MaxOverTime(kCacheTime); + + // This is the upward slope: we are coming back up. + const HugeLength grow = usage_ - usage_tracker_.MinOverTime(kCacheTime); + + // Ideally we now know that we dipped down by some amount, then came + // up. Sadly our stats aren't quite good enough to guarantee things + // happened in the proper order. Suppose our usage takes the + // following path (in essentially zero time): + // 0, 10000, 5000, 5500. + // + // Clearly the proven dip here is 500. But we'll compute shrink = 5000, + // grow = 5500--we'd prefer to measure from a min *after* that shrink. + // + // It's difficult to ensure this, and hopefully this case is rare. + // TODO(b/134690209): figure out if we can solve that problem. + const HugeLength dip = std::min(shrink, grow); + + // Fragmentation: we may need to cache a little more than the actual + // usage jump. 10% seems to be a reasonable addition that doesn't waste + // much space, but gets good performance on tests. + const HugeLength slack = dip / 10; + + const HugeLength lim = dip + slack; + + if (lim > limit()) { + last_limit_change_ = clock_(); + limit_ = lim; + } +} + +void HugeCache::IncUsage(HugeLength n) { + usage_ += n; + usage_tracker_.Report(usage_); + moving_limit_tracker_.Report(usage_); + detailed_tracker_.Report(usage_); + off_peak_tracker_.Report(NHugePages(0)); + if (use_moving_average_) limit_ = moving_limit_tracker_.RollingMaxAverage(); + if (size() + usage() > max_rss_) max_rss_ = size() + usage(); +} + +void HugeCache::DecUsage(HugeLength n) { + usage_ -= n; + usage_tracker_.Report(usage_); + moving_limit_tracker_.Report(usage_); + detailed_tracker_.Report(usage_); + const HugeLength max = usage_tracker_.MaxOverTime(kCacheTime); + ASSERT(max >= usage_); + const HugeLength off_peak = max - usage_; + off_peak_tracker_.Report(off_peak); + if (use_moving_average_) limit_ = moving_limit_tracker_.RollingMaxAverage(); + if (size() + usage() > max_rss_) max_rss_ = size() + usage(); +} + +void HugeCache::UpdateSize(HugeLength size) { + size_tracker_.Report(size); + if (size > max_size_) max_size_ = size; + if (size + usage() > max_rss_) max_rss_ = size + usage(); + + // TODO(b/134691947): moving this inside the MinMaxTracker would save one call + // to clock_() but all MinMaxTrackers would track regret instead. + int64_t now = clock_(); + if (now > last_regret_update_) { + regret_ += size.raw_num() * (now - last_regret_update_); + last_regret_update_ = now; + } +} + +HugeRange HugeCache::Get(HugeLength n, bool *from_released) { + HugeRange r = DoGet(n, from_released); + // failure to get a range should "never" "never" happen (VSS limits + // or wildly incorrect allocation sizes only...) Don't deal with + // this case for cache size accounting. + IncUsage(r.len()); + + if (!use_moving_average_) { + const bool miss = r.valid() && *from_released; + if (miss) MaybeGrowCacheLimit(n); + } + return r; +} + +void HugeCache::Release(HugeRange r) { + DecUsage(r.len()); + + cache_.Insert(r); + size_ += r.len(); + if (use_moving_average_ ? size_ + usage_ <= limit() : size_ <= limit()) { + fills_++; + } else { + overflows_++; + } + + // Shrink the limit, if we're going to do it, before we shrink to + // the max size. (This could reduce the number of regions we break + // in half to avoid overshrinking.) + if (use_moving_average_) { + if (limit() > usage()) { + total_fast_unbacked_ += ShrinkCache(limit() - usage()); + } + } else { + if (absl::Nanoseconds(clock_() - last_limit_change_) > (kCacheTime * 2)) { + total_fast_unbacked_ += MaybeShrinkCacheLimit(); + } + total_fast_unbacked_ += ShrinkCache(limit()); + } + + UpdateSize(size()); +} + +void HugeCache::ReleaseUnbacked(HugeRange r) { + DecUsage(r.len()); + // No point in trying to cache it, just hand it back. + allocator_->Release(r); +} + +HugeLength HugeCache::MaybeShrinkCacheLimit() { + last_limit_change_ = clock_(); + + const HugeLength min = size_tracker_.MinOverTime(kCacheTime * 2); + // If cache size has gotten down to at most 20% of max, we assume + // we're close enough to the optimal size--we don't want to fiddle + // too much/too often unless we have large gaps in usage. + if (min < limit() / 5) return NHugePages(0); + + // Take away half of the unused portion. + HugeLength drop = std::max(min / 2, NHugePages(1)); + limit_ = std::max(limit() <= drop ? NHugePages(0) : limit() - drop, + MinCacheLimit()); + return ShrinkCache(limit()); +} + +HugeLength HugeCache::ShrinkCache(HugeLength target) { + HugeLength removed = NHugePages(0); + while (size_ > target) { + if (respect_mincache_limit_ && size_ <= MinCacheLimit()) break; + // Remove smallest-ish nodes, to avoid fragmentation where possible. + auto *node = Find(NHugePages(1)); + CHECK_CONDITION(node); + HugeRange r = node->range(); + cache_.Remove(node); + // Suppose we're 10 MiB over target but the smallest available node + // is 100 MiB. Don't go overboard--split up the range. + // In particular - this prevents disastrous results if we've decided + // the cache should be 99 MiB but the actual hot usage is 100 MiB + // (and it is unfragmented). + const HugeLength delta = size() - target; + if (r.len() > delta) { + HugeRange to_remove, leftover; + std::tie(to_remove, leftover) = Split(r, delta); + ASSERT(leftover.valid()); + cache_.Insert(leftover); + r = to_remove; + } + + size_ -= r.len(); + // Note, actual unback implementation is temporarily dropping and + // re-acquiring the page heap lock here. + unback_(r.start_addr(), r.byte_len()); + allocator_->Release(r); + removed += r.len(); + } + + return removed; +} + +HugeLength HugeCache::ReleaseCachedPages(HugeLength n) { + // This is a good time to check: is our cache going persistently unused? + HugeLength released = NHugePages(0); + if (use_moving_average_) { + usage_tracker_.Report(usage_); + moving_limit_tracker_.Report(usage_); + detailed_tracker_.Report(usage_); + limit_ = moving_limit_tracker_.RollingMaxAverage(); + if (limit() >= usage_) released = ShrinkCache(limit() - usage_); + } else { + released = MaybeShrinkCacheLimit(); + } + + if (!ignore_oncepersec_release_ && released < n) { + n -= released; + const HugeLength target = n > size() ? NHugePages(0) : size() - n; + released += ShrinkCache(target); + } + + UpdateSize(size()); + total_periodic_unbacked_ += released; + return released; +} + +void HugeCache::AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, + PageAgeHistograms *ages) const { + CHECK_CONDITION(kPagesPerHugePage >= kMaxPages); + for (const HugeAddressMap::Node *node = cache_.first(); node != nullptr; + node = node->next()) { + HugeLength n = node->range().len(); + if (large != nullptr) { + large->spans++; + large->normal_pages += n.in_pages(); + } + + if (ages != nullptr) { + ages->RecordRange(n.in_pages(), false, node->when()); + } + } +} + +HugeAddressMap::Node *HugeCache::Find(HugeLength n) { + HugeAddressMap::Node *curr = cache_.root(); + // invariant: curr != nullptr && curr->longest >= n + // we favor smaller gaps and lower nodes and lower addresses, in that + // order. The net effect is that we are neither a best-fit nor a + // lowest-address allocator but vaguely close to both. + HugeAddressMap::Node *best = nullptr; + while (curr && curr->longest() >= n) { + if (curr->range().len() >= n) { + if (!best || best->range().len() > curr->range().len()) { + best = curr; + } + } + + // Either subtree could contain a better fit and we don't want to + // search the whole tree. Pick a reasonable child to look at. + auto left = curr->left(); + auto right = curr->right(); + if (!left || left->longest() < n) { + curr = right; + continue; + } + + if (!right || right->longest() < n) { + curr = left; + continue; + } + + // Here, we have a nontrivial choice. + if (left->range().len() == right->range().len()) { + if (left->longest() <= right->longest()) { + curr = left; + } else { + curr = right; + } + } else if (left->range().len() < right->range().len()) { + // Here, the longest range in both children is the same...look + // in the subtree with the smaller root, as that's slightly + // more likely to be our best. + curr = left; + } else { + curr = right; + } + } + return best; +} + +void HugeCache::Print(TCMalloc_Printer *out) { + const long long millis = absl::ToInt64Milliseconds(kCacheTime); + out->printf( + "HugeCache: contains unused, backed hugepage(s) " + "(kCacheTime = %lldms)\n", + millis); + // a / (a + b), avoiding division by zero + auto safe_ratio = [](double a, double b) { + const double total = a + b; + if (total == 0) return 0.0; + return a / total; + }; + + const double hit_rate = safe_ratio(hits_, misses_); + const double overflow_rate = safe_ratio(overflows_, fills_); + + out->printf( + "HugeCache: %zu / %zu hugepages cached / cache limit " + "(%.3f hit rate, %.3f overflow rate)\n", + size_.raw_num(), limit().raw_num(), hit_rate, overflow_rate); + out->printf("HugeCache: %zu MiB fast unbacked, %zu MiB periodic\n", + total_fast_unbacked_.in_bytes() / 1024 / 1024, + total_periodic_unbacked_.in_bytes() / 1024 / 1024); + UpdateSize(size()); + out->printf("HugeCache: %zu MiB*s cached since startup\n", + NHugePages(regret_).in_mib() / 1000 / 1000 / 1000); + + usage_tracker_.Report(usage_); + const HugeLength usage_min = usage_tracker_.MinOverTime(kCacheTime); + const HugeLength usage_max = usage_tracker_.MaxOverTime(kCacheTime); + out->printf( + "HugeCache: recent usage range: %zu min - %zu curr - %zu max MiB\n", + usage_min.in_mib(), usage_.in_mib(), usage_max.in_mib()); + + const HugeLength off_peak = usage_max - usage_; + off_peak_tracker_.Report(off_peak); + const HugeLength off_peak_min = off_peak_tracker_.MinOverTime(kCacheTime); + const HugeLength off_peak_max = off_peak_tracker_.MaxOverTime(kCacheTime); + out->printf( + "HugeCache: recent offpeak range: %zu min - %zu curr - %zu max MiB\n", + off_peak_min.in_mib(), off_peak.in_mib(), off_peak_max.in_mib()); + + const HugeLength cache_min = size_tracker_.MinOverTime(kCacheTime); + const HugeLength cache_max = size_tracker_.MaxOverTime(kCacheTime); + out->printf( + "HugeCache: recent cache range: %zu min - %zu curr - %zu max MiB\n", + cache_min.in_mib(), size_.in_mib(), cache_max.in_mib()); + + detailed_tracker_.Print(out); +} + +void HugeCache::PrintInPbtxt(PbtxtRegion *hpaa) { + hpaa->PrintI64("huge_cache_time_const", + absl::ToInt64Milliseconds(kCacheTime)); + + // a / (a + b), avoiding division by zero + auto safe_ratio = [](double a, double b) { + const double total = a + b; + if (total == 0) return 0.0; + return a / total; + }; + + const double hit_rate = safe_ratio(hits_, misses_); + const double overflow_rate = safe_ratio(overflows_, fills_); + + // number of bytes in HugeCache + hpaa->PrintI64("cached_huge_page_bytes", size_.raw_num() * kPageSize); + // max allowed bytes in HugeCache + hpaa->PrintI64("max_cached_huge_page_bytes", limit().raw_num() * kPageSize); + // lifetime cache hit rate + hpaa->PrintDouble("huge_cache_hit_rate", hit_rate); + // lifetime cache overflow rate + hpaa->PrintDouble("huge_cache_overflow_rate", overflow_rate); + // bytes eagerly unbacked by HugeCache + hpaa->PrintI64("fast_unbacked_bytes", total_fast_unbacked_.in_bytes()); + // bytes unbacked by periodic releaser thread + hpaa->PrintI64("periodic_unbacked_bytes", + total_periodic_unbacked_.in_bytes()); + UpdateSize(size()); + // memory cached since startup (in MiB*s) + hpaa->PrintI64("huge_cache_regret", + NHugePages(regret_).in_mib() / 1000 / 1000 / 1000); + + usage_tracker_.Report(usage_); + const HugeLength usage_min = usage_tracker_.MinOverTime(kCacheTime); + const HugeLength usage_max = usage_tracker_.MaxOverTime(kCacheTime); + { + auto usage_stats = hpaa->CreateSubRegion("huge_cache_usage_stats"); + usage_stats.PrintI64("min_bytes", usage_min.in_bytes()); + usage_stats.PrintI64("current_bytes", usage_.in_bytes()); + usage_stats.PrintI64("max_bytes", usage_max.in_bytes()); + } + + const HugeLength off_peak = usage_max - usage_; + off_peak_tracker_.Report(off_peak); + const HugeLength off_peak_min = off_peak_tracker_.MinOverTime(kCacheTime); + const HugeLength off_peak_max = off_peak_tracker_.MaxOverTime(kCacheTime); + { + auto usage_stats = hpaa->CreateSubRegion("huge_cache_offpeak_stats"); + usage_stats.PrintI64("min_bytes", off_peak_min.in_bytes()); + usage_stats.PrintI64("current_bytes", off_peak.in_bytes()); + usage_stats.PrintI64("max_bytes", off_peak_max.in_bytes()); + } + + const HugeLength cache_min = size_tracker_.MinOverTime(kCacheTime); + const HugeLength cache_max = size_tracker_.MaxOverTime(kCacheTime); + { + auto usage_stats = hpaa->CreateSubRegion("huge_cache_cache_stats"); + usage_stats.PrintI64("min_bytes", cache_min.in_bytes()); + usage_stats.PrintI64("current_bytes", size_.in_bytes()); + usage_stats.PrintI64("max_bytes", cache_max.in_bytes()); + } + + detailed_tracker_.PrintInPbtxt(hpaa); +} + +} // namespace tcmalloc diff --git a/tcmalloc/huge_cache.h b/tcmalloc/huge_cache.h new file mode 100644 index 000000000..807297ef2 --- /dev/null +++ b/tcmalloc/huge_cache.h @@ -0,0 +1,302 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Wrapping interface for HugeAllocator that handles backing and +// unbacking, including a hot cache of backed single hugepages. +#ifndef TCMALLOC_HUGE_CACHE_H_ +#define TCMALLOC_HUGE_CACHE_H_ +#include +#include + +#include +#include + +#include "absl/time/time.h" +#include "tcmalloc/common.h" +#include "tcmalloc/experiment.h" +#include "tcmalloc/experiment_config.h" +#include "tcmalloc/huge_allocator.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc { + +typedef void (*MemoryModifyFunction)(void *start, size_t len); + +// Assumed to tick in nanoseconds. +typedef int64_t (*ClockFunc)(); + +// Track the extreme values of a HugeLength value over the past +// kWindow (time ranges approximate.) +template +class MinMaxTracker { + public: + explicit constexpr MinMaxTracker(ClockFunc clock, absl::Duration w) + : kWindow(w), kEpochLength(kWindow / kEpochs), clock_(clock) {} + + void Report(HugeLength val); + void Print(TCMalloc_Printer *out) const; + void PrintInPbtxt(PbtxtRegion *hpaa) const; + + // If t < kEpochLength, these functions return statistics for last epoch. The + // granularity is kEpochLength (rounded up). + HugeLength MaxOverTime(absl::Duration t) const; + HugeLength MinOverTime(absl::Duration t) const; + + private: + const absl::Duration kWindow; + const absl::Duration kEpochLength; + + void UpdateClock(); + + static constexpr HugeLength kMaxVal = + NHugePages(std::numeric_limits::max()); + struct Extrema { + HugeLength min, max; + + static Extrema Nil() { + Extrema e; + e.max = NHugePages(0); + e.min = kMaxVal; + return e; + } + + void Report(HugeLength n) { + max = std::max(max, n); + min = std::min(min, n); + } + bool operator==(const Extrema &other) const; + bool operator!=(const Extrema &other) const; + }; + + Extrema window_[kEpochs]{}; + size_t last_epoch_{0}; + size_t i_{0}; + ClockFunc clock_; +}; + +// Explicit instantiations are defined in huge_cache.cc. +extern template class MinMaxTracker<>; +extern template class MinMaxTracker<600>; + +template +constexpr HugeLength MinMaxTracker::kMaxVal; + +class MovingAverageTracker { + public: + explicit MovingAverageTracker(ClockFunc clock, absl::Duration time_constant, + absl::Duration resolution) + : kTimeConstant(time_constant), + kResolution(resolution), + res_per_time_constant_(kTimeConstant / kResolution), + clock_(clock), + last_update_(clock_()) {} + + void Report(HugeLength val); + + HugeLength RollingMaxAverage() const; + + private: + const absl::Duration kTimeConstant; + const absl::Duration kResolution; + const size_t res_per_time_constant_{0}; + + static constexpr HugeLength kMaxVal = + NHugePages(std::numeric_limits::max()); + + HugeLength last_max_ = NHugePages(0); + HugeLength last_val_ = NHugePages(0); + double rolling_max_average_{0}; + ClockFunc clock_; + int64_t last_update_; +}; + +class HugeCache { + public: + // For use in production + HugeCache(HugeAllocator *allocator, MetadataAllocFunction meta_allocate, + MemoryModifyFunction unback) + : HugeCache(allocator, meta_allocate, unback, GetCurrentTimeNanos, + IsExperimentActive( + Experiment::TCMALLOC_HUGEPAGE_MOVING_AVERAGE_RELEASE) + ? absl::Seconds(30) + : absl::Seconds(1), + IsExperimentActive( + Experiment::TCMALLOC_HUGEPAGE_MOVING_AVERAGE_RELEASE), + IsExperimentActive( + Experiment::TCMALLOC_HUGEPAGE_MOVING_AVERAGE_RELEASE), + false, absl::Seconds(1)) {} + + // For testing with mock clock + HugeCache(HugeAllocator *allocator, MetadataAllocFunction meta_allocate, + MemoryModifyFunction unback, ClockFunc clock) + : HugeCache(allocator, meta_allocate, unback, clock, + IsExperimentActive( + Experiment::TCMALLOC_HUGEPAGE_MOVING_AVERAGE_RELEASE) + ? absl::Seconds(30) + : absl::Seconds(1), + IsExperimentActive( + Experiment::TCMALLOC_HUGEPAGE_MOVING_AVERAGE_RELEASE), + IsExperimentActive( + Experiment::TCMALLOC_HUGEPAGE_MOVING_AVERAGE_RELEASE), + false, absl::Seconds(1)) {} + + // For extensive testing + HugeCache(HugeAllocator *allocator, MetadataAllocFunction meta_allocate, + MemoryModifyFunction unback, ClockFunc clock, + absl::Duration time_constant, bool use_moving_average, + bool respect_mincache_limit, bool ignore_oncepersec_release, + absl::Duration ema_resolution) + : allocator_(allocator), + cache_(meta_allocate), + kCacheTime(time_constant), + clock_(clock), + last_limit_change_(clock()), + last_regret_update_(clock()), + detailed_tracker_(clock, absl::Minutes(10)), + moving_limit_tracker_(clock, kCacheTime, ema_resolution), + usage_tracker_(clock, kCacheTime * 2), + off_peak_tracker_(clock, kCacheTime * 2), + size_tracker_(clock, kCacheTime * 2), + use_moving_average_(use_moving_average), + respect_mincache_limit_(respect_mincache_limit), + ignore_oncepersec_release_(ignore_oncepersec_release), + unback_(unback) {} + // Allocate a usable set of contiguous hugepages. Try to give out + // memory that's currently backed from the kernel if we have it available. + // *from_released is set to false if the return range is already backed; + // otherwise, it is set to true (and the caller should back it.) + HugeRange Get(HugeLength n, bool *from_released); + + // Deallocate (assumed to be backed by the kernel.) + void Release(HugeRange r); + // As Release, but the range is assumed to _not_ be backed. + void ReleaseUnbacked(HugeRange r); + + // Release to the system up to hugepages of cache contents; returns + // the number of hugepages released. + HugeLength ReleaseCachedPages(HugeLength n); + + // Backed memory available. + HugeLength size() const { return size_; } + // Total memory cached (in HugeLength * nanoseconds) + uint64_t regret() const { return regret_; } + // Current limit for how much backed memory we'll cache. + HugeLength limit() const { return limit_; } + // Sum total of unreleased requests. + HugeLength usage() const { return usage_; } + + size_t hits() const { return hits_; } + size_t misses() const { return misses_; } + uint64_t max_size() const { return max_size_.raw_num(); } + uint64_t max_rss() const { return max_rss_.raw_num(); } + uint64_t weighted_hits() const { return weighted_hits_; } + uint64_t weighted_misses() const { return weighted_misses_; } + + const MinMaxTracker<> *usage_tracker() const { return &usage_tracker_; } + + void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, + PageAgeHistograms *ages) const; + + BackingStats stats() const { + BackingStats s; + s.system_bytes = (usage() + size()).in_bytes(); + s.free_bytes = size().in_bytes(); + s.unmapped_bytes = 0; + return s; + } + + void Print(TCMalloc_Printer *out); + void PrintInPbtxt(PbtxtRegion *hpaa); + + private: + HugeAllocator *allocator_; + + // We just cache-missed a request for pages; + // should we grow? + void MaybeGrowCacheLimit(HugeLength missed); + // Check if the cache seems consistently too big. Returns the + // number of pages *evicted* (not the change in limit). + HugeLength MaybeShrinkCacheLimit(); + + // Ensure the cache contains at most hugepages, + // returning the number removed. + HugeLength ShrinkCache(HugeLength target); + + HugeRange DoGet(HugeLength n, bool *from_released); + + HugeAddressMap::Node *Find(HugeLength n); + + HugeAddressMap cache_; + HugeLength size_{NHugePages(0)}; + + HugeLength limit_{NHugePages(10)}; + const absl::Duration kCacheTime; + + size_t hits_{0}; + size_t misses_{0}; + size_t fills_{0}; + size_t overflows_{0}; + uint64_t weighted_hits_{0}; + uint64_t weighted_misses_{0}; + + // Sum(size of Gets) - Sum(size of Releases), i.e. amount of backed + // hugepages our user currently wants to have. + void IncUsage(HugeLength n); + void DecUsage(HugeLength n); + HugeLength usage_{NHugePages(0)}; + + // This is tcmalloc::GetCurrentTimeNanos, except overridable for tests. + ClockFunc clock_; + int64_t last_limit_change_; + + // 10 hugepages is a good baseline for our cache--easily wiped away + // by periodic release, and not that much memory on any real server. + // However, we can go below it if we haven't used that much for 30 seconds. + HugeLength MinCacheLimit() const { + return respect_mincache_limit_ + ? std::min( + detailed_tracker_.MaxOverTime(absl::Seconds(30)) - usage_, + NHugePages(10)) + : NHugePages(10); + } + + uint64_t regret_{0}; // overflows if we cache 585 hugepages for 1 year + int64_t last_regret_update_; + void UpdateSize(HugeLength size); + + MinMaxTracker<600> detailed_tracker_; + + MovingAverageTracker moving_limit_tracker_; + + MinMaxTracker<> usage_tracker_; + MinMaxTracker<> off_peak_tracker_; + MinMaxTracker<> size_tracker_; + HugeLength max_size_{NHugePages(0)}; + HugeLength max_rss_{NHugePages(0)}; + + HugeLength total_fast_unbacked_{NHugePages(0)}; + HugeLength total_periodic_unbacked_{NHugePages(0)}; + + const bool use_moving_average_; + const bool respect_mincache_limit_; + const bool ignore_oncepersec_release_; + + MemoryModifyFunction unback_; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_HUGE_CACHE_H_ diff --git a/tcmalloc/huge_cache_test.cc b/tcmalloc/huge_cache_test.cc new file mode 100644 index 000000000..783f85b43 --- /dev/null +++ b/tcmalloc/huge_cache_test.cc @@ -0,0 +1,578 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/huge_cache.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/internal/cycleclock.h" +#include "absl/memory/memory.h" +#include "absl/random/random.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc { +namespace { + +class HugeCacheTest : public testing::Test { + private: + // Allow tests to modify the clock used by the cache. + static int64_t clock_offset_; + static int64_t Clock() { + return tcmalloc::GetCurrentTimeNanos() + clock_offset_; + } + + // Use a tiny fraction of actual size so we can test aggressively. + static void *AllocateFake(size_t bytes, size_t *actual, size_t align) { + if (bytes % kHugePageSize != 0) { + tcmalloc::Log(tcmalloc::kCrash, __FILE__, __LINE__, "not aligned", bytes, + kHugePageSize); + } + if (align % kHugePageSize != 0) { + tcmalloc::Log(tcmalloc::kCrash, __FILE__, __LINE__, "not aligned", align, + kHugePageSize); + } + *actual = bytes; + // we'll actually provide hidden backing, one word per hugepage. + bytes /= kHugePageSize; + align /= kHugePageSize; + size_t index = backing.size(); + if (index % align != 0) { + index += (align - (index & align)); + } + backing.resize(index + bytes); + void *ptr = reinterpret_cast(index * kHugePageSize); + return ptr; + } + // This isn't super good form but we'll never have more than one HAT + // extant at once. + static std::vector backing; + + // We use actual malloc for metadata allocations, but we track them so they + // can be deleted. (TODO make this an arena if we care, which I doubt) + static void *MallocMetadata(size_t size) { + metadata_bytes += size; + void *ptr = calloc(size, 1); + metadata_allocs.push_back(ptr); + return ptr; + } + static std::vector metadata_allocs; + static size_t metadata_bytes; + + // This is wordy, but necessary for mocking: + class BackingInterface { + public: + virtual void Unback(void *p, size_t len) = 0; + virtual ~BackingInterface() {} + }; + + class MockBackingInterface : public BackingInterface { + public: + MOCK_METHOD2(Unback, void(void *p, size_t len)); + }; + + static void MockUnback(void *p, size_t len) { mock_->Unback(p, len); } + + protected: + static std::unique_ptr> mock_; + + size_t HugePagesRequested() { return backing.size() - 1024; } + + size_t MetadataBytes() { return metadata_bytes; } + + HugeCacheTest() { + // We don't use the first few bytes, because things might get weird + // given zero pointers. + backing.resize(1024); + metadata_bytes = 0; + mock_ = absl::make_unique>(); + + clock_offset_ = 0; + } + + ~HugeCacheTest() override { + for (void *p : metadata_allocs) { + free(p); + } + metadata_allocs.clear(); + backing.clear(); + mock_.reset(nullptr); + } + + size_t *GetActual(HugePage p) { + size_t index = reinterpret_cast(p.start_addr()) / kHugePageSize; + return &backing[index]; + } + + void CheckPages(HugeRange r, size_t c) { + for (HugePage p = r.first; p < r.first + r.n; ++p) { + EXPECT_EQ(c, *GetActual(p)); + } + } + + void MarkPages(HugeRange r, size_t c) { + for (HugePage p = r.first; p < r.first + r.n; ++p) { + *GetActual(p) = c; + } + } + + void Advance(absl::Duration d) { clock_offset_ += ToInt64Nanoseconds(d); } + + tcmalloc::HugeAllocator alloc_{AllocateFake, MallocMetadata}; + HugeCache cache_{&alloc_, MallocMetadata, MockUnback, Clock}; +}; + +std::vector HugeCacheTest::backing; +std::vector HugeCacheTest::metadata_allocs; +size_t HugeCacheTest::metadata_bytes; +std::unique_ptr> + HugeCacheTest::mock_; + +int64_t HugeCacheTest::clock_offset_; + +TEST_F(HugeCacheTest, Basic) { + bool from; + for (int i = 0; i < 100 * 1000; ++i) { + cache_.Release(cache_.Get(NHugePages(1), &from)); + } +} + +TEST_F(HugeCacheTest, Backing) { + bool from; + cache_.Release(cache_.Get(NHugePages(4), &from)); + EXPECT_TRUE(from); + // We should be able to split up a large range... + HugeRange r1 = cache_.Get(NHugePages(3), &from); + EXPECT_FALSE(from); + HugeRange r2 = cache_.Get(NHugePages(1), &from); + EXPECT_FALSE(from); + + // and then merge it back. + cache_.Release(r1); + cache_.Release(r2); + HugeRange r = cache_.Get(NHugePages(4), &from); + EXPECT_FALSE(from); + cache_.Release(r); +} + +TEST_F(HugeCacheTest, Release) { + bool from; + const HugeLength one = NHugePages(1); + cache_.Release(cache_.Get(NHugePages(5), &from)); + HugeRange r1, r2, r3, r4, r5; + r1 = cache_.Get(one, &from); + r2 = cache_.Get(one, &from); + r3 = cache_.Get(one, &from); + r4 = cache_.Get(one, &from); + r5 = cache_.Get(one, &from); + cache_.Release(r1); + cache_.Release(r2); + cache_.Release(r3); + cache_.Release(r4); + cache_.Release(r5); + + r1 = cache_.Get(one, &from); + ASSERT_EQ(false, from); + r2 = cache_.Get(one, &from); + ASSERT_EQ(false, from); + r3 = cache_.Get(one, &from); + ASSERT_EQ(false, from); + r4 = cache_.Get(one, &from); + ASSERT_EQ(false, from); + r5 = cache_.Get(one, &from); + ASSERT_EQ(false, from); + cache_.Release(r1); + cache_.Release(r2); + cache_.Release(r5); + + ASSERT_EQ(NHugePages(3), cache_.size()); + EXPECT_CALL(*mock_, Unback(r5.start_addr(), kHugePageSize * 1)).Times(1); + EXPECT_EQ(NHugePages(1), cache_.ReleaseCachedPages(NHugePages(1))); + cache_.Release(r3); + cache_.Release(r4); + + EXPECT_CALL(*mock_, Unback(r1.start_addr(), 4 * kHugePageSize)).Times(1); + EXPECT_EQ(NHugePages(4), cache_.ReleaseCachedPages(NHugePages(200))); +} + +TEST_F(HugeCacheTest, Regret) { + bool from; + HugeRange r = cache_.Get(NHugePages(20), &from); + cache_.Release(r); + HugeLength cached = cache_.size(); + absl::Duration d = absl::Seconds(20); + Advance(d); + char buf[512]; + TCMalloc_Printer out(buf, 512); + cache_.Print(&out); // To update the regret + uint64_t expected_regret = absl::ToInt64Nanoseconds(d) * cached.raw_num(); + // Not exactly accurate since the mock clock advances with real time, and + // when we measure regret will be updated. + EXPECT_NEAR(cache_.regret(), expected_regret, expected_regret / 1000); + EXPECT_GE(cache_.regret(), expected_regret); +} + +TEST_F(HugeCacheTest, Stats) { + bool from; + HugeRange r = cache_.Get(NHugePages(1 + 1 + 2 + 1 + 3), &from); + HugeRange r1, r2, r3, spacer1, spacer2; + std::tie(r1, spacer1) = Split(r, NHugePages(1)); + std::tie(spacer1, r2) = Split(spacer1, NHugePages(1)); + std::tie(r2, spacer2) = Split(r2, NHugePages(2)); + std::tie(spacer2, r3) = Split(spacer2, NHugePages(1)); + cache_.Release(r1); + cache_.Release(r2); + cache_.Release(r3); + + ASSERT_EQ(NHugePages(6), cache_.size()); + r1 = cache_.Get(NHugePages(1), &from); + ASSERT_EQ(false, from); + r2 = cache_.Get(NHugePages(2), &from); + ASSERT_EQ(false, from); + r3 = cache_.Get(NHugePages(3), &from); + ASSERT_EQ(false, from); + + struct Helper { + static void Stat(const HugeCache &cache, size_t *spans, + size_t *pages_backed, size_t *pages_unbacked, + double *avg_age) { + PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); + LargeSpanStats large; + memset(&large, 0, sizeof(large)); + cache.AddSpanStats(nullptr, &large, &ages); + + const PageAgeHistograms::Histogram *hist = ages.GetTotalHistogram(false); + *spans = large.spans; + *pages_backed = large.normal_pages; + *pages_unbacked = large.returned_pages; + *avg_age = hist->avg_age(); + } + }; + + double avg_age; + size_t spans; + size_t pages_backed; + size_t pages_unbacked; + + cache_.Release(r1); + absl::SleepFor(absl::Microseconds(5000)); + Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked, &avg_age); + EXPECT_EQ(0, pages_unbacked); + EXPECT_EQ(1, spans); + EXPECT_EQ(NHugePages(1).in_pages(), pages_backed); + EXPECT_LE(0.005, avg_age); + + cache_.Release(r2); + absl::SleepFor(absl::Microseconds(2500)); + Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked, &avg_age); + EXPECT_EQ(0, pages_unbacked); + EXPECT_EQ(2, spans); + EXPECT_EQ(NHugePages(3).in_pages(), pages_backed); + EXPECT_LE((0.0075 * 1 + 0.0025 * 2) / (1 + 2), avg_age); + + cache_.Release(r3); + absl::SleepFor(absl::Microseconds(1250)); + Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked, &avg_age); + EXPECT_EQ(0, pages_unbacked); + EXPECT_EQ(3, spans); + EXPECT_EQ(NHugePages(6).in_pages(), pages_backed); + EXPECT_LE((0.00875 * 1 + 0.00375 * 2 + 0.00125 * 3) / (1 + 2 + 3), avg_age); +} + +static double Frac(HugeLength num, HugeLength denom) { + return static_cast(num.raw_num()) / denom.raw_num(); +} + +TEST_F(HugeCacheTest, Growth) { + bool released; + absl::BitGen rng; + // fragmentation is a bit of a challenge + std::uniform_int_distribution sizes(1, 5); + // fragment the cache badly. + std::vector keep; + std::vector drop; + for (int i = 0; i < 1000; ++i) { + auto &l = std::bernoulli_distribution()(rng) ? keep : drop; + l.push_back(cache_.Get(NHugePages(sizes(rng)), &released)); + } + + for (auto r : drop) { + cache_.Release(r); + } + + // See the TODO in HugeCache::MaybeGrowCache; without this delay, + // the above fragmentation plays merry havoc with our instrumentation. + Advance(absl::Seconds(30)); + + // Test that our cache can grow to fit a working set. + HugeLength hot_set_sizes[] = {NHugePages(5), NHugePages(10), NHugePages(100), + NHugePages(10000)}; + + for (const HugeLength hot : hot_set_sizes) { + SCOPED_TRACE(absl::StrCat("cache size = ", hot.in_bytes() / 1024.0 / 1024.0, + " MiB")); + // Exercise the cache allocating about worth of data. After + // a brief warmup phase, we should do this without needing to back much. + auto alloc = [&]() -> std::pair { + HugeLength got = NHugePages(0); + HugeLength needed_backing = NHugePages(0); + std::vector items; + while (got < hot) { + HugeLength rest = hot - got; + HugeLength l = std::min(rest, NHugePages(sizes(rng))); + got += l; + items.push_back(cache_.Get(l, &released)); + if (released) needed_backing += l; + } + for (auto r : items) { + cache_.Release(r); + } + return {needed_backing, got}; + }; + + // warmup - we're allowed to incur misses and be too big. + for (int i = 0; i < 2; ++i) { + alloc(); + } + + HugeLength needed_backing = NHugePages(0); + HugeLength total = NHugePages(0); + for (int i = 0; i < 16; ++i) { + auto r = alloc(); + needed_backing += r.first; + total += r.second; + // Cache shouldn't have just grown arbitrarily + const HugeLength cached = cache_.size(); + // Allow us 10% slop, but don't get out of bed for tiny caches anyway. + const double ratio = Frac(cached, hot); + SCOPED_TRACE( + absl::StrCat(cached.raw_num(), "hps ", Frac(r.first, r.second))); + if (ratio > 1 && cached > NHugePages(16)) { + EXPECT_LE(ratio, 1.1); + } + } + // approximately, given the randomized sizing... + + const double ratio = Frac(needed_backing, total); + EXPECT_LE(ratio, 0.2); + } +} + +// If we repeatedly grow and shrink, but do so very slowly, we should *not* +// cache the large variation. +TEST_F(HugeCacheTest, SlowGrowthUncached) { + absl::BitGen rng; + std::uniform_int_distribution sizes(1, 10); + for (int i = 0; i < 20; ++i) { + std::vector rs; + for (int j = 0; j < 20; ++j) { + Advance(absl::Milliseconds(600)); + bool released; + rs.push_back(cache_.Get(NHugePages(sizes(rng)), &released)); + } + HugeLength max_cached = NHugePages(0); + for (auto r : rs) { + Advance(absl::Milliseconds(600)); + cache_.Release(r); + max_cached = std::max(max_cached, cache_.size()); + } + EXPECT_GE(NHugePages(10), max_cached); + } +} + +// If very rarely we have a huge increase in usage, it shouldn't be cached. +TEST_F(HugeCacheTest, SpikesUncached) { + absl::BitGen rng; + std::uniform_int_distribution sizes(1, 10); + for (int i = 0; i < 20; ++i) { + std::vector rs; + for (int j = 0; j < 2000; ++j) { + bool released; + rs.push_back(cache_.Get(NHugePages(sizes(rng)), &released)); + } + HugeLength max_cached = NHugePages(0); + for (auto r : rs) { + cache_.Release(r); + max_cached = std::max(max_cached, cache_.size()); + } + EXPECT_GE(NHugePages(10), max_cached); + Advance(absl::Seconds(30)); + } +} + +// If very rarely we have a huge *decrease* in usage, it *should* be cached. +TEST_F(HugeCacheTest, DipsCached) { + absl::BitGen rng; + std::uniform_int_distribution sizes(1, 10); + for (int i = 0; i < 20; ++i) { + std::vector rs; + HugeLength got = NHugePages(0); + HugeLength uncached = NHugePages(0); + for (int j = 0; j < 2000; ++j) { + bool released; + HugeLength n = NHugePages(sizes(rng)); + rs.push_back(cache_.Get(n, &released)); + got += n; + if (released) uncached += n; + } + // Most of our time is at high usage... + Advance(absl::Seconds(30)); + // Now immediately release and reallocate. + for (auto r : rs) { + cache_.Release(r); + } + + // warmup + if (i >= 2) { + EXPECT_GE(0.06, Frac(uncached, got)); + } + } +} + +// Suppose in a previous era of behavior we needed a giant cache, +// but now we don't. Do we figure this out promptly? +TEST_F(HugeCacheTest, Shrink) { + absl::BitGen rng; + std::uniform_int_distribution sizes(1, 10); + for (int i = 0; i < 20; ++i) { + std::vector rs; + for (int j = 0; j < 2000; ++j) { + HugeLength n = NHugePages(sizes(rng)); + bool released; + rs.push_back(cache_.Get(n, &released)); + } + for (auto r : rs) { + cache_.Release(r); + } + } + + ASSERT_LE(NHugePages(10000), cache_.size()); + + for (int i = 0; i < 30; ++i) { + // New working set <= 20 pages. + Advance(absl::Seconds(1)); + + // And do some work. + for (int j = 0; j < 100; ++j) { + bool released; + HugeRange r1 = cache_.Get(NHugePages(sizes(rng)), &released); + HugeRange r2 = cache_.Get(NHugePages(sizes(rng)), &released); + cache_.Release(r1); + cache_.Release(r2); + } + } + + ASSERT_GE(NHugePages(25), cache_.limit()); +} + +TEST_F(HugeCacheTest, Usage) { + bool released; + + auto r1 = cache_.Get(NHugePages(10), &released); + EXPECT_EQ(NHugePages(10), cache_.usage()); + + auto r2 = cache_.Get(NHugePages(100), &released); + EXPECT_EQ(NHugePages(110), cache_.usage()); + + cache_.Release(r1); + EXPECT_EQ(NHugePages(100), cache_.usage()); + + // Pretend we unbacked this. + cache_.ReleaseUnbacked(r2); + EXPECT_EQ(NHugePages(0), cache_.usage()); +} + +class MinMaxTrackerTest : public testing::Test { + private: + static int64_t clock_; + + static int64_t FakeClock() { return clock_; } + + protected: + absl::Duration duration_ = absl::Seconds(2); + + tcmalloc::MinMaxTracker<> tracker_{FakeClock, duration_}; + + void Advance(absl::Duration d) { clock_ += ToInt64Nanoseconds(d); } + void CheckMax(HugeLength expected) { + EXPECT_EQ(expected, tracker_.MaxOverTime(absl::Nanoseconds(1))); + EXPECT_EQ(expected, tracker_.MaxOverTime(duration_ / 2)); + EXPECT_EQ(expected, tracker_.MaxOverTime(duration_)); + } + void CheckMin(HugeLength expected) { + EXPECT_EQ(expected, tracker_.MinOverTime(absl::Nanoseconds(1))); + EXPECT_EQ(expected, tracker_.MinOverTime(duration_ / 2)); + EXPECT_EQ(expected, tracker_.MinOverTime(duration_)); + } +}; + +int64_t MinMaxTrackerTest::clock_{0}; + +TEST_F(MinMaxTrackerTest, Works) { + tracker_.Report(NHugePages(0)); + CheckMax(NHugePages(0)); + CheckMin(NHugePages(0)); + + tracker_.Report(NHugePages(10)); + CheckMax(NHugePages(10)); + CheckMin(NHugePages(0)); + + tracker_.Report(NHugePages(5)); + CheckMax(NHugePages(10)); + CheckMin(NHugePages(0)); + + tracker_.Report(NHugePages(100)); + CheckMax(NHugePages(100)); + CheckMin(NHugePages(0)); + + // Some tests for advancing time + Advance(duration_ / 3); + tracker_.Report(NHugePages(2)); + EXPECT_EQ(NHugePages(2), tracker_.MaxOverTime(absl::Nanoseconds(1))); + EXPECT_EQ(NHugePages(100), tracker_.MaxOverTime(duration_ / 2)); + EXPECT_EQ(NHugePages(100), tracker_.MaxOverTime(duration_)); + EXPECT_EQ(NHugePages(2), tracker_.MinOverTime(absl::Nanoseconds(1))); + EXPECT_EQ(NHugePages(0), tracker_.MinOverTime(duration_ / 2)); + EXPECT_EQ(NHugePages(0), tracker_.MinOverTime(duration_)); + + Advance(duration_ / 3); + tracker_.Report(NHugePages(5)); + EXPECT_EQ(NHugePages(5), tracker_.MaxOverTime(absl::Nanoseconds(1))); + EXPECT_EQ(NHugePages(5), tracker_.MaxOverTime(duration_ / 2)); + EXPECT_EQ(NHugePages(100), tracker_.MaxOverTime(duration_)); + EXPECT_EQ(NHugePages(5), tracker_.MinOverTime(absl::Nanoseconds(1))); + EXPECT_EQ(NHugePages(2), tracker_.MinOverTime(duration_ / 2)); + EXPECT_EQ(NHugePages(0), tracker_.MinOverTime(duration_)); + + // This should annilate everything. + Advance(duration_ * 2); + tracker_.Report(NHugePages(1)); + CheckMax(NHugePages(1)); + CheckMin(NHugePages(1)); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/huge_page_aware_allocator.cc b/tcmalloc/huge_page_aware_allocator.cc new file mode 100644 index 000000000..82004b962 --- /dev/null +++ b/tcmalloc/huge_page_aware_allocator.cc @@ -0,0 +1,621 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/huge_page_aware_allocator.h" + +#include +#include + +#include + +#include "absl/base/internal/cycleclock.h" +#include "absl/base/internal/spinlock.h" +#include "tcmalloc/common.h" +#include "tcmalloc/experiment.h" +#include "tcmalloc/experiment_config.h" +#include "tcmalloc/huge_allocator.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/util.h" +#include "tcmalloc/pagemap.h" +#include "tcmalloc/parameters.h" +#include "tcmalloc/span.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc { + +bool decide_want_hpaa(); +ABSL_ATTRIBUTE_WEAK int default_subrelease(); + +bool decide_subrelease() { + if (!decide_want_hpaa()) { + // Subrelease is off if HPAA is off. + return false; + } + + const char *e = + tcmalloc::tcmalloc_internal::thread_safe_getenv("TCMALLOC_HPAA_CONTROL"); + if (e) { + if (e[0] == '0') return false; + if (e[0] == '1') return false; + if (e[0] == '2') return true; + Log(kCrash, __FILE__, __LINE__, "bad env var", e); + return false; + } + + if (default_subrelease != nullptr) { + const int decision = default_subrelease(); + if (decision != 0) { + return decision > 0; + } + } + + if (tcmalloc::IsExperimentActive(tcmalloc::Experiment::TCMALLOC_TEMERAIRE)) { + return false; + } + + return true; +} + +} // namespace tcmalloc + +namespace tcmalloc { + +// Some notes: locking discipline here is a bit funny, because +// we want to *not* hold the pageheap lock while backing memory. + +// We have here a collection of slightly different allocators each +// optimized for slightly different purposes. This file has two main purposes: +// - pick the right one for a given allocation +// - provide enough data to figure out what we picked last time! + +HugePageAwareAllocator::HugePageAwareAllocator(bool tagged) + : PageAllocatorInterface("HugePageAware", tagged), + alloc_(tagged ? AllocAndReport : AllocAndReport, + MetaDataAlloc), + cache_(HugeCache{&alloc_, MetaDataAlloc, UnbackWithoutLock}) { + tracker_allocator_.Init(Static::arena()); + region_allocator_.Init(Static::arena()); +} + +HugePageAwareAllocator::FillerType::Tracker *HugePageAwareAllocator::GetTracker( + HugePage p) { + void *v = Static::pagemap()->GetHugepage(p.first_page()); + FillerType::Tracker *pt = reinterpret_cast(v); + ASSERT(pt == nullptr || pt->location() == p); + return pt; +} + +void HugePageAwareAllocator::SetTracker( + HugePage p, HugePageAwareAllocator::FillerType::Tracker *pt) { + Static::pagemap()->SetHugepage(p.first_page(), pt); +} + +PageID HugePageAwareAllocator::AllocAndContribute(HugePage p, Length n, + bool donated) { + CHECK_CONDITION(p.start_addr() != nullptr); + FillerType::Tracker *pt = tracker_allocator_.New(); + new (pt) FillerType::Tracker(p, absl::base_internal::CycleClock::Now()); + ASSERT(pt->longest_free_range() >= n); + PageID page = pt->Get(n); + ASSERT(page == p.first_page()); + SetTracker(p, pt); + filler_.Contribute(pt, donated); + return page; +} + +PageID HugePageAwareAllocator::RefillFiller(Length n, bool *from_released) { + HugeRange r = cache_.Get(NHugePages(1), from_released); + if (!r.valid()) return 0; + // This is duplicate to Finalize, but if we need to break up + // hugepages to get to our usage limit it would be very bad to break + // up what's left of r after we allocate from there--while r is + // mostly empty, clearly what's left in the filler is too fragmented + // to be very useful, and we would rather release those + // pages. Otherwise, we're nearly guaranteed to release r (if n + // isn't very large), and the next allocation will just repeat this + // process. + Static::page_allocator()->ShrinkToUsageLimit(); + return AllocAndContribute(r.start(), n, /*donated=*/false); +} + +Span *HugePageAwareAllocator::Finalize(Length n, PageID page) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + if (page == 0) return nullptr; + Span *ret = Span::New(page, n); + Static::pagemap()->Set(page, ret); + ASSERT(!ret->sampled()); + info_.RecordAlloc(page, n); + Static::page_allocator()->ShrinkToUsageLimit(); + return ret; +} + +// For anything <= half a huge page, we will unconditionally use the filler +// to pack it into a single page. If we need another page, that's fine. +Span *HugePageAwareAllocator::AllocSmall(Length n, bool *from_released) { + PageID page; + FillerType::Tracker *pt; + if (filler_.TryGet(n, &pt, &page)) { + *from_released = false; + return Finalize(n, page); + } + + page = RefillFiller(n, from_released); + return Finalize(n, page); +} + +Span *HugePageAwareAllocator::AllocLarge(Length n, bool *from_released) { + // If it's an exact page multiple, just pull it from pages directly. + HugeLength hl = HLFromPages(n); + if (hl.in_pages() == n) { + return AllocRawHugepages(n, from_released); + } + + PageID page; + // If we fit in a single hugepage, try the Filler first. + if (n < kPagesPerHugePage) { + FillerType::Tracker *pt; + if (filler_.TryGet(n, &pt, &page)) { + *from_released = false; + return Finalize(n, page); + } + } + + // If we're using regions in this binary (see below comment), is + // there currently available space there? + if (regions_.MaybeGet(n, &page, from_released)) { + return Finalize(n, page); + } + + // We have two choices here: allocate a new region or go to + // hugepages directly (hoping that slack will be filled by small + // allocation.) The second strategy is preferrable, as it's + // typically faster and usually more space efficient, but it's sometimes + // catastrophic. + // + // So test directly if we're in the bad case--almost no binaries are. + // If not, just fall back to direct allocation (and hope we do hit that case!) + const Length slack = info_.slack(); + // Don't bother at all until the binary is reasonably sized + if (slack < HLFromBytes(64 * 1024 * 1024).in_pages()) { + return AllocRawHugepages(n, from_released); + } + + // In the vast majority of binaries, we have many small allocations which + // will nicely fill slack. (Fleetwide, the average ratio is 15:1; only + // a handful of binaries fall below 1:1.) + const Length small = info_.small(); + if (slack < small) { + return AllocRawHugepages(n, from_released); + } + + // We couldn't allocate a new region. They're oversized, so maybe we'd get + // lucky with a smaller request? + if (!AddRegion()) { + return AllocRawHugepages(n, from_released); + } + + CHECK_CONDITION(regions_.MaybeGet(n, &page, from_released)); + return Finalize(n, page); +} + +Span *HugePageAwareAllocator::AllocEnormous(Length n, bool *from_released) { + return AllocRawHugepages(n, from_released); +} + +Span *HugePageAwareAllocator::AllocRawHugepages(Length n, bool *from_released) { + HugeLength hl = HLFromPages(n); + + HugeRange r = cache_.Get(hl, from_released); + if (!r.valid()) return nullptr; + + // We now have a huge page range that covers our request. There + // might be some slack in it if n isn't a multiple of + // kPagesPerHugePage. Add the hugepage with slack to the filler, + // pretending the non-slack portion is a smaller allocation. + Length total = hl.in_pages(); + Length slack = total - n; + HugePage first = r.start(); + SetTracker(first, nullptr); + HugePage last = first + r.len() - NHugePages(1); + if (slack == 0) { + SetTracker(last, nullptr); + return Finalize(total, r.start().first_page()); + } + + ++donated_huge_pages_; + + Length here = kPagesPerHugePage - slack; + ASSERT(here > 0); + AllocAndContribute(last, here, /*donated=*/true); + return Finalize(n, r.start().first_page()); +} + +static void BackSpan(Span *span) { + SystemBack(span->start_address(), span->bytes_in_span()); +} + +// public +Span *HugePageAwareAllocator::New(Length n) { + CHECK_CONDITION(n > 0); + bool from_released; + Span *s = LockAndAlloc(n, &from_released); + if (s && from_released) BackSpan(s); + ASSERT(!s || IsTaggedMemory(s->start_address()) == tagged_); + return s; +} + +Span *HugePageAwareAllocator::LockAndAlloc(Length n, bool *from_released) { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + // Our policy depends on size. For small things, we will pack them + // into single hugepages. + if (n <= kPagesPerHugePage / 2) { + return AllocSmall(n, from_released); + } + + // For anything too big for the filler, we use either a direct hugepage + // allocation, or possibly the regions if we are worried about slack. + if (n <= Region::size().in_pages()) { + return AllocLarge(n, from_released); + } + + // In the worst case, we just fall back to directly allocating a run + // of hugepages. + return AllocEnormous(n, from_released); +} + +// public +Span *HugePageAwareAllocator::NewAligned(Length n, Length align) { + if (align <= 1) { + return New(n); + } + + // we can do better than this, but... + // TODO(b/134690769): support higher align. + CHECK_CONDITION(align <= kPagesPerHugePage); + bool from_released; + Span *s; + { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + s = AllocRawHugepages(n, &from_released); + } + if (s && from_released) BackSpan(s); + ASSERT(!s || IsTaggedMemory(s->start_address()) == tagged_); + return s; +} + +void HugePageAwareAllocator::DeleteFromHugepage(FillerType::Tracker *pt, + PageID p, Length n) { + if (filler_.Put(pt, p, n) == nullptr) return; + ReleaseHugepage(pt); +} + +bool HugePageAwareAllocator::AddRegion() { + HugeRange r = alloc_.Get(Region::size()); + if (!r.valid()) return false; + Region *region = region_allocator_.New(); + new (region) Region(r); + regions_.Contribute(region); + return true; +} + +void HugePageAwareAllocator::Delete(Span *span) { + ASSERT(!span || IsTaggedMemory(span->start_address()) == tagged_); + PageID p = span->first_page(); + HugePage hp = HugePageContaining(p); + Length n = span->num_pages(); + info_.RecordFree(p, n); + + Span::Delete(span); + + // The tricky part, as with so many allocators: where did we come from? + // There are several possibilities. + FillerType::Tracker *pt = GetTracker(hp); + // a) We got packed by the filler onto a single hugepage - return our + // allocation to that hugepage in the filler. + if (pt != nullptr) { + ASSERT(hp == HugePageContaining(p + n - 1)); + DeleteFromHugepage(pt, p, n); + return; + } + + // b) We got put into a region, possibly crossing hugepages - + // return our allocation to the region. + if (regions_.MaybePut(p, n)) return; + + // c) we came straight from the HugeCache - return straight there. (We + // might have had slack put into the filler - if so, return that virtual + // allocation to the filler too!) + ASSERT(n >= kPagesPerHugePage); + HugeLength hl = HLFromPages(n); + HugePage last = hp + hl - NHugePages(1); + Length slack = hl.in_pages() - n; + if (slack == 0) { + ASSERT(GetTracker(last) == nullptr); + } else { + pt = GetTracker(last); + CHECK_CONDITION(pt != nullptr); + // We put the slack into the filler (see AllocEnormous.) + // Handle this page separately as a virtual allocation + // onto the last hugepage. + PageID virt = last.first_page(); + Length virt_len = kPagesPerHugePage - slack; + pt = filler_.Put(pt, virt, virt_len); + // We may have used the slack, which would prevent us from returning + // the entire range now. If filler returned a Tracker, we are fully empty. + if (pt == nullptr) { + // Last page isn't empty -- pretend the range was shorter. + --hl; + } else { + // Last page was empty - but if we sub-released it, we still + // have to split it off and release it independently.) + if (pt->released()) { + --hl; + ReleaseHugepage(pt); + } else { + // Get rid of the tracker *object*, but not the *hugepage* + // (which is still part of our range.) We were able to reclaim the + // contributed slack. + --donated_huge_pages_; + SetTracker(pt->location(), nullptr); + tracker_allocator_.Delete(pt); + } + } + } + cache_.Release({hp, hl}); +} + +void HugePageAwareAllocator::ReleaseHugepage(FillerType::Tracker *pt) { + ASSERT(pt->used_pages() == 0); + HugeRange r = {pt->location(), NHugePages(1)}; + SetTracker(pt->location(), nullptr); + + if (pt->released()) { + cache_.ReleaseUnbacked(r); + } else { + cache_.Release(r); + } + + tracker_allocator_.Delete(pt); +} + +// public +BackingStats HugePageAwareAllocator::stats() const { + BackingStats stats = alloc_.stats(); + const auto actual_system = stats.system_bytes; + stats += cache_.stats(); + stats += filler_.stats(); + stats += regions_.stats(); + // the "system" (total managed) byte count is wildly double counted, + // since it all comes from HugeAllocator but is then managed by + // cache/regions/filler. Adjust for that. + stats.system_bytes = actual_system; + return stats; +} + +// public +void HugePageAwareAllocator::GetSmallSpanStats(SmallSpanStats *result) { + GetSpanStats(result, nullptr, nullptr); +} + +// public +void HugePageAwareAllocator::GetLargeSpanStats(LargeSpanStats *result) { + GetSpanStats(nullptr, result, nullptr); +} + +void HugePageAwareAllocator::GetSpanStats(SmallSpanStats *small, + LargeSpanStats *large, + PageAgeHistograms *ages) { + if (small != nullptr) { + memset(small, 0, sizeof(*small)); + } + if (large != nullptr) { + memset(large, 0, sizeof(*large)); + } + + alloc_.AddSpanStats(small, large, ages); + filler_.AddSpanStats(small, large, ages); + regions_.AddSpanStats(small, large, ages); + cache_.AddSpanStats(small, large, ages); +} + +// public +Length HugePageAwareAllocator::ReleaseAtLeastNPages(Length num_pages) { + Length released = 0; + released += cache_.ReleaseCachedPages(HLFromPages(num_pages)).in_pages(); + + // This is our long term plan but in current state will lead to insufficent + // THP coverage. It is however very useful to have the ability to turn this on + // for testing. + // TODO(b/134690769): make this work, remove the flag guard. + if (Parameters::hpaa_subrelease()) { + while (released < num_pages) { + Length got = filler_.ReleasePages(); + if (got == 0) break; + released += got; + } + } + + // TODO(b/134690769): + // - perhaps release region? + // - refuse to release if we're too close to zero? + info_.RecordRelease(num_pages, released); + return released; +} + +static double BytesToMiB(size_t bytes) { + const double MiB = 1048576.0; + return bytes / MiB; +} + +static void BreakdownStats(TCMalloc_Printer *out, const BackingStats &s, + const char *label) { + out->printf("%s %6.1f MiB used, %6.1f MiB free, %6.1f MiB unmapped\n", label, + BytesToMiB(s.system_bytes - s.free_bytes - s.unmapped_bytes), + BytesToMiB(s.free_bytes), BytesToMiB(s.unmapped_bytes)); +} + +static void BreakdownStatsInPbtxt(PbtxtRegion *hpaa, const BackingStats &s, + const char *key) { + auto usage = hpaa->CreateSubRegion(key); + usage.PrintI64("used", s.system_bytes - s.free_bytes - s.unmapped_bytes); + usage.PrintI64("free", s.free_bytes); + usage.PrintI64("unmapped", s.unmapped_bytes); +} + +// public +void HugePageAwareAllocator::Print(TCMalloc_Printer *out) { Print(out, true); } + +void HugePageAwareAllocator::Print(TCMalloc_Printer *out, bool everything) { + SmallSpanStats small; + LargeSpanStats large; + BackingStats bstats; + PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); + absl::base_internal::SpinLockHolder h(&pageheap_lock); + bstats = stats(); + GetSpanStats(&small, &large, &ages); + PrintStats("HugePageAware", out, bstats, small, large, everything); + out->printf( + "\nHuge page aware allocator components:\n" + "------------------------------------------------\n"); + out->printf("HugePageAware: breakdown of used / free / unmapped space:\n"); + + auto fstats = filler_.stats(); + BreakdownStats(out, fstats, "HugePageAware: filler"); + + auto rstats = regions_.stats(); + BreakdownStats(out, rstats, "HugePageAware: region"); + + auto cstats = cache_.stats(); + // Everything in the filler came from the cache - + // adjust the totals so we see the amount used by the mutator. + cstats.system_bytes -= fstats.system_bytes; + BreakdownStats(out, cstats, "HugePageAware: cache "); + + auto astats = alloc_.stats(); + // Everything in *all* components came from here - + // so again adjust the totals. + astats.system_bytes -= (fstats + rstats + cstats).system_bytes; + BreakdownStats(out, astats, "HugePageAware: alloc "); + out->printf("\n"); + + out->printf("HugePageAware: filler donations %zu\n", + donated_huge_pages_.raw_num()); + + // Component debug output + // Filler is by far the most important; print (some) of it + // unconditionally. + filler_.Print(out, everything); + out->printf("\n"); + if (everything) { + regions_.Print(out); + out->printf("\n"); + cache_.Print(out); + out->printf("\n"); + alloc_.Print(out); + out->printf("\n"); + + // Use statistics + info_.Print(out); + + // and age tracking. + ages.Print("HugePageAware", out); + } + + out->printf("PARAMETER hpaa_subrelease %d\n", + Parameters::hpaa_subrelease() ? 1 : 0); +} + +void HugePageAwareAllocator::PrintInPbtxt(PbtxtRegion *region) { + SmallSpanStats small; + LargeSpanStats large; + PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); + absl::base_internal::SpinLockHolder h(&pageheap_lock); + GetSpanStats(&small, &large, &ages); + PrintStatsInPbtxt(region, small, large, ages); + { + auto hpaa = region->CreateSubRegion("huge_page_allocator"); + hpaa.PrintBool("using_hpaa", true); + hpaa.PrintBool("using_hpaa_subrelease", Parameters::hpaa_subrelease()); + + // Fill HPAA Usage + auto fstats = filler_.stats(); + BreakdownStatsInPbtxt(&hpaa, fstats, "filler_usage"); + + auto rstats = regions_.stats(); + BreakdownStatsInPbtxt(&hpaa, rstats, "region_usage"); + + auto cstats = cache_.stats(); + // Everything in the filler came from the cache - + // adjust the totals so we see the amount used by the mutator. + cstats.system_bytes -= fstats.system_bytes; + BreakdownStatsInPbtxt(&hpaa, cstats, "cache_usage"); + + auto astats = alloc_.stats(); + // Everything in *all* components came from here - + // so again adjust the totals. + astats.system_bytes -= (fstats + rstats + cstats).system_bytes; + BreakdownStatsInPbtxt(&hpaa, astats, "alloc_usage"); + + filler_.PrintInPbtxt( + &hpaa, fstats.system_bytes - fstats.free_bytes - fstats.unmapped_bytes); + regions_.PrintInPbtxt(&hpaa); + cache_.PrintInPbtxt(&hpaa); + alloc_.PrintInPbtxt(&hpaa); + + // Use statistics + info_.PrintInPbtxt(&hpaa, "hpaa_stat"); + + hpaa.PrintI64("filler_donated_huge_pages", donated_huge_pages_.raw_num()); + } +} + +template +void *HugePageAwareAllocator::AllocAndReport(size_t bytes, size_t *actual, + size_t align) { + void *p = SystemAlloc(bytes, actual, align, tagged); + if (p == nullptr) return p; + const PageID page = reinterpret_cast(p) >> kPageShift; + const Length page_len = (*actual) >> kPageShift; + Static::pagemap()->Ensure(page, page_len); + return p; +} + +void *HugePageAwareAllocator::MetaDataAlloc(size_t bytes) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return Static::arena()->Alloc(bytes); +} + +Length HugePageAwareAllocator::ReleaseAtLeastNPagesBreakingHugepages(Length n) { + // We desparately need to release memory, and are willing to + // compromise on hugepage usage. That means releasing from the filler. + Length ret = 0; + + while (ret < n) { + Length got = filler_.ReleasePages(); + if (got == 0) break; + ret += got; + } + + return ret; +} + +void HugePageAwareAllocator::UnbackWithoutLock(void *start, size_t length) { + pageheap_lock.Unlock(); + SystemRelease(start, length); + pageheap_lock.Lock(); +} + +} // namespace tcmalloc diff --git a/tcmalloc/huge_page_aware_allocator.h b/tcmalloc/huge_page_aware_allocator.h new file mode 100644 index 000000000..3bb887b42 --- /dev/null +++ b/tcmalloc/huge_page_aware_allocator.h @@ -0,0 +1,168 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_H_ +#define TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_H_ + +#include + +#include "absl/base/thread_annotations.h" +#include "tcmalloc/arena.h" +#include "tcmalloc/common.h" +#include "tcmalloc/huge_allocator.h" +#include "tcmalloc/huge_cache.h" +#include "tcmalloc/huge_page_filler.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/huge_region.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/page_allocator_interface.h" +#include "tcmalloc/page_heap_allocator.h" +#include "tcmalloc/span.h" +#include "tcmalloc/stats.h" +#include "tcmalloc/system-alloc.h" + +namespace tcmalloc { + +bool decide_subrelease(); + +// An implementation of the PageAllocator interface that is hugepage-efficent. +// Attempts to pack allocations into full hugepages wherever possible, +// and aggressively returns empty ones to the system. +class HugePageAwareAllocator : public PageAllocatorInterface { + public: + explicit HugePageAwareAllocator(bool tagged); + + // Allocate a run of "n" pages. Returns zero if out of memory. + // Caller should not pass "n == 0" -- instead, n should have + // been rounded up already. + Span* New(Length n) LOCKS_EXCLUDED(pageheap_lock) override; + + // As New, but the returned span is aligned to a -page boundary. + // must be a power of two. + Span* NewAligned(Length n, Length align) + LOCKS_EXCLUDED(pageheap_lock) override; + + // Delete the span "[p, p+n-1]". + // REQUIRES: span was returned by earlier call to New() and + // has not yet been deleted. + void Delete(Span* span) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; + + BackingStats stats() const EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; + + void GetSmallSpanStats(SmallSpanStats* result) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; + + void GetLargeSpanStats(LargeSpanStats* result) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; + + // Try to release at least num_pages for reuse by the OS. Returns + // the actual number of pages released, which may be less than + // num_pages if there weren't enough pages to release. The result + // may also be larger than num_pages since page_heap might decide to + // release one large range instead of fragmenting it into two + // smaller released and unreleased ranges. + Length ReleaseAtLeastNPages(Length num_pages) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; + + Length ReleaseAtLeastNPagesBreakingHugepages(Length n) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Prints stats about the page heap to *out. + void Print(TCMalloc_Printer* out) LOCKS_EXCLUDED(pageheap_lock) override; + + // Print stats to *out, excluding long/likely uninteresting things + // unless is true. + void Print(TCMalloc_Printer* out, bool everything) + LOCKS_EXCLUDED(pageheap_lock); + + void PrintInPbtxt(PbtxtRegion* region) LOCKS_EXCLUDED(pageheap_lock) override; + + HugeLength DonatedHugePages() const EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return donated_huge_pages_; + } + + const HugeCache* cache() const { return &cache_; } + + private: + typedef HugePageFiller> FillerType; + FillerType filler_; + + // Calls SystemRelease, but with dropping of pageheap_lock around the call. + static void UnbackWithoutLock(void* start, size_t length) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + typedef HugeRegion Region; + HugeRegionSet regions_; + + PageHeapAllocator tracker_allocator_; + PageHeapAllocator region_allocator_; + + FillerType::Tracker* GetTracker(HugePage p); + + void SetTracker(HugePage p, FillerType::Tracker* pt); + + template + static void* AllocAndReport(size_t bytes, size_t* actual, size_t align) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + static void* MetaDataAlloc(size_t bytes); + HugeAllocator alloc_; + HugeCache cache_; + + // donated_huge_pages_ measures the number of huge pages contributed to the + // filler from left overs of large huge page allocations. When the large + // allocation is deallocated, we decrement this count *if* we were able to + // fully reassemble the address range (that is, the partial hugepage did not + // get stuck in the filler). + HugeLength donated_huge_pages_ GUARDED_BY(pageheap_lock); + + void GetSpanStats(SmallSpanStats* small, LargeSpanStats* large, + PageAgeHistograms* ages); + + PageID RefillFiller(Length n, bool* from_released) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Allocate the first from p, and contribute the rest to the filler. If + // "donated" is true, the contribution will be marked as coming from the + // tail of a multi-hugepage alloc. Returns the allocated section. + PageID AllocAndContribute(HugePage p, Length n, bool donated) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + // Helpers for New(). + + Span* LockAndAlloc(Length n, bool* from_released); + + Span* AllocSmall(Length n, bool* from_released) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + Span* AllocLarge(Length n, bool* from_released) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + Span* AllocEnormous(Length n, bool* from_released) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + Span* AllocRawHugepages(Length n, bool* from_released) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + bool AddRegion() EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + void ReleaseHugepage(FillerType::Tracker* pt) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + // Return an allocation from a single hugepage. + void DeleteFromHugepage(FillerType::Tracker* pt, PageID p, Length n) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Finish an allocation request - give it a span and mark it in the pagemap. + Span* Finalize(Length n, PageID page); +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_H_ diff --git a/tcmalloc/huge_page_filler.h b/tcmalloc/huge_page_filler.h new file mode 100644 index 000000000..3cb29b90d --- /dev/null +++ b/tcmalloc/huge_page_filler.h @@ -0,0 +1,945 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_HUGE_PAGE_FILLER_H_ +#define TCMALLOC_HUGE_PAGE_FILLER_H_ + +#include +#include + +#include +#include + +#include "absl/base/internal/cycleclock.h" +#include "tcmalloc/huge_allocator.h" +#include "tcmalloc/huge_cache.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/linked_list.h" +#include "tcmalloc/internal/range_tracker.h" +#include "tcmalloc/span.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc { + +// PageTracker keeps track of the allocation status of every page in a HugePage. +// It allows allocation and deallocation of a contiguous run of pages. +// +// Its mutating methods are annotated as requiring the pageheap_lock, in order +// to support unlocking the page heap lock in a dynamic annotation-friendly way. +template +class PageTracker : public TList>::Elem { + public: + constexpr PageTracker(HugePage p, int64_t when) + : location_(p), + free_{}, + when_(when), + released_(false), + donated_(false), + releasing_(0) {} + + // REQUIRES: there's a free range of at least n pages + PageID Get(Length n) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // REQUIRES: p was the result of a previous call to Get(n) + void Put(PageID p, Length n) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Are unused pages returned-to-system? + bool released() const { return released_; } + // Was this tracker donated from the tail of a multi-hugepage allocation? + // Only up-to-date when the tracker is on a TrackerList in the Filler; + // otherwise the value is meaningless. + bool donated() const { return donated_; } + // Set/reset the donated flag. The donated status is lost, for instance, + // when further allocations are made on the tracker. + void set_donated(bool status) { donated_ = status; } + + // These statistics help us measure the fragmentation of a hugepage and + // the desirability of allocating from this hugepage. + Length longest_free_range() const { return free_.longest_free(); } + size_t nallocs() const { return free_.allocs(); } + Length used_pages() const { return free_.used(); } + Length free_pages() const; + bool empty() const; + bool full() const; + + // Returns the hugepage whose availability is being tracked. + HugePage location() const { return location_; } + + // Return all unused pages to the system, mark future frees to do same. + // Returns the count of pages unbacked. + size_t ReleaseFree() EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Return this allocation to the system, if policy warrants it. + // + // Our policy is: Once we break a hugepage by returning a fraction of it, we + // return *anything* unused. This simplifies tracking. + void MaybeRelease(PageID p, Length n) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + if (!released_) { + return; + } + + releasing_ += n; + ReleasePagesWithoutLock(p, n); + releasing_ -= n; + } + + void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, + PageAgeHistograms *ages) const; + + private: + HugePage location_; + RangeTracker free_; + // TODO(b/134691947): optimize computing this; it's on the fast path. + int64_t when_; + static_assert(kPagesPerHugePage <= std::numeric_limits::max(), + "nallocs must be able to support kPagesPerHugePage!"); + bool released_; + bool donated_; + // releasing_ needs to be a Length, since we may have up to + // kPagesPerHugePage-1 parallel subreleases in-flight. When they complete, we + // need to have enough information to determine whether or not any remain + // in-flight. This is stored as a length, rather than a bitmap of pages. + Length releasing_ GUARDED_BY(pageheap_lock); + + void ReleasePages(PageID p, Length n) { + void *ptr = reinterpret_cast(p << kPageShift); + size_t byte_len = n << kPageShift; + Unback(ptr, byte_len); + } + + void ReleasePagesWithoutLock(PageID p, Length n) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + pageheap_lock.Unlock(); + + void *ptr = reinterpret_cast(p << kPageShift); + size_t byte_len = n << kPageShift; + Unback(ptr, byte_len); + + pageheap_lock.Lock(); + } +}; + +// This tracks a set of unfilled hugepages, and fulfills allocations +// with a goal of filling some hugepages as tightly as possible and emptying +// out the remainder. +template +class HugePageFiller { + public: + HugePageFiller(); + + typedef TrackerType Tracker; + + // Our API is simple, but note that it does not include an + // unconditional allocation, only a "try"; we expect callers to + // allocate new hugepages if needed. This simplifies using it in a + // few different contexts (and improves the testing story - no + // dependencies.) + bool TryGet(Length n, TrackerType **hugepage, PageID *p) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Marks [p, p + n) as usable by new allocations into *pt; returns pt + // if that hugepage is now empty (nullptr otherwise.) + // REQUIRES: pt is owned by this object (has been Contribute()), and + // {pt, p, n} was the result of a previous TryGet. + TrackerType *Put(TrackerType *pt, PageID p, Length n) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Contributes a tracker to the filler. If "donated," then the tracker is + // marked as having come from the tail of a multi-hugepage allocation, which + // causes it to be treated slightly differently. + void Contribute(TrackerType *pt, bool donated); + + HugeLength size() const { return size_; } + + // Useful statistics + Length pages_allocated() const { return allocated_; } + Length used_pages() const { return allocated_; } + Length unmapped_pages() const { return unmapped_; } + Length free_pages() const; + + // Fraction of used pages that are on non-released hugepages and + // thus could be backed by kernel hugepages. (Of course, we can't + // guarantee that the kernel had available 2-mib regions of physical + // memory--so this being 1 doesn't mean that everything actually + // *is* hugepage-backed!) + double hugepage_frac() const; + + // Find the emptiest possible hugepage and release its free memory + // to the system. Return the number of pages released. + // Currently our implementation doesn't really use this (no need!) + Length ReleasePages() EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, + PageAgeHistograms *ages) const; + + BackingStats stats() const; + void Print(TCMalloc_Printer *out, bool everything) const; + void PrintInPbtxt(PbtxtRegion *hpaa, uint64_t filler_usage_used) const; + + private: + typedef TList TrackerList; + + // This class wraps an array of N TrackerLists and a Bitmap storing which + // elements are non-empty. + template + class HintedTrackerLists { + public: + HintedTrackerLists() : nonempty_{} { + for (int i = 0; i < N; ++i) { + lists_[i].Init(); + } + } + // Removes a TrackerType from the first non-empty freelist with index at + // least n and returns it. Returns nullptr if there is none. + TrackerType *GetLeast(const size_t n) { + ASSERT(n < N); + size_t i = nonempty_.FindSet(n); + if (i == N) { + return nullptr; + } + ASSERT(!lists_[i].empty()); + TrackerType *pt = lists_[i].first(); + CHECK_CONDITION(pt != nullptr); + if (lists_[i].remove(pt)) { + nonempty_.ClearBit(i); + } + return pt; + } + void Add(TrackerType *pt, const size_t i) { + ASSERT(i < N); + ASSERT(pt != nullptr); + lists_[i].prepend(pt); + nonempty_.SetBit(i); + } + void Remove(TrackerType *pt, const size_t i) { + ASSERT(i < N); + ASSERT(pt != nullptr); + if (lists_[i].remove(pt)) { + nonempty_.ClearBit(i); + } + } + TrackerList &operator[](const size_t n) { + ASSERT(n < N); + return lists_[n]; + } + const TrackerList &operator[](const size_t n) const { + ASSERT(n < N); + return lists_[n]; + } + // Runs a functor on all HugePages in the TrackerLists. + // This method is const but the Functor gets passed a non-const pointer. + // This quirk is inherited from TrackerList. + template + void Iter(const Functor &func, size_t start) const { + size_t i = nonempty_.FindSet(start); + while (i < N) { + auto &list = lists_[i]; + ASSERT(!list.empty()); + for (TrackerType *pt : list) { + func(pt); + } + i++; + if (i < N) i = nonempty_.FindSet(i); + } + } + + private: + TrackerList lists_[N]; + Bitmap nonempty_; + }; + + // We group hugepages first by longest-free (as a measure of fragmentation), + // then into 8 chunks inside there by desirability of allocation. + static constexpr size_t kChunks = 8; + // Which chunk should this hugepage be in? + // This returns the largest possible value kChunks-1 iff pt has a single + // allocation. + size_t IndexFor(TrackerType *pt); + // Returns index for regular_alloc_. + static size_t ListFor(size_t longest, size_t chunk); + static constexpr size_t kNumLists = kPagesPerHugePage * kChunks; + + HintedTrackerLists regular_alloc_; + HintedTrackerLists donated_alloc_; + // Partially released ones that we are trying to release. + HugeLength n_released_; + HintedTrackerLists regular_alloc_released_; + + // Remove pt from the appropriate HintedTrackerList. + void Remove(TrackerType *pt); + // Put pt in the appropriate HintedTrackerList. + void Place(TrackerType *pt); + // Like Place(), but for use when donating from the tail of a multi-hugepage + // allocation. + void Donate(TrackerType *pt); + + HugeLength size_; + + Length allocated_; + Length unmapped_; + + // How much have we eagerly unmapped (in already released hugepages), but + // not reported to ReleasePages calls? + Length unmapping_unaccounted_{0}; +}; + +template +inline PageID PageTracker::Get(Length n) { + size_t index = free_.FindAndMark(n); + // If we are now using the entire capacity of the huge page and do not have a + // partial release in-flight (releasing_ > 0), then it is no longer partially + // released to the OS. + if (releasing_ == 0) { + released_ = released_ && !full(); + } + return location_.first_page() + index; +} + +template +inline void PageTracker::Put(PageID p, Length n) { + size_t index = p - location_.first_page(); + const Length before = free_.total_free(); + free_.Unmark(index, n); + + when_ = static_cast( + (static_cast(before) * when_ + + static_cast(n) * absl::base_internal::CycleClock::Now()) / + (before + n)); +} + +template +inline size_t PageTracker::ReleaseFree() { + released_ = true; + size_t count = 0; + size_t index = 0; + size_t n; + while (free_.NextFreeRange(index, &index, &n)) { + PageID p = location_.first_page() + index; + ReleasePages(p, n); + count += n; + index += n; + } + when_ = absl::base_internal::CycleClock::Now(); + return count; +} + +template +inline void PageTracker::AddSpanStats(SmallSpanStats *small, + LargeSpanStats *large, + PageAgeHistograms *ages) const { + size_t index = 0, n; + + int64_t w = when_; + while (free_.NextFreeRange(index, &index, &n)) { + if (n < kMaxPages) { + if (small != nullptr) { + if (released()) { + small->returned_length[n]++; + } else { + small->normal_length[n]++; + } + } + } else { + if (large != nullptr) { + large->spans++; + if (released()) { + large->returned_pages += n; + } else { + large->normal_pages += n; + } + } + } + + if (ages) { + ages->RecordRange(n, released(), w); + } + index += n; + } +} + +template +inline bool PageTracker::empty() const { + return free_.used() == 0; +} + +template +inline bool PageTracker::full() const { + return free_.used() == free_.size(); +} + +template +inline Length PageTracker::free_pages() const { + return kPagesPerHugePage - used_pages(); +} + +template +inline HugePageFiller::HugePageFiller() + : n_released_(NHugePages(0)), + size_(NHugePages(0)), + allocated_(0), + unmapped_(0) {} + +template +inline bool HugePageFiller::TryGet(Length n, + TrackerType **hugepage, + PageID *p) { + // How do we choose which hugepage to allocate from (among those with + // a free range of at least n?) Our goal is to be as space-efficient + // as possible, which leads to two priorities: + // + // (1) avoid fragmentation; keep free ranges in a hugepage as long + // as possible. This maintains our ability to satisfy large + // requests without allocating new hugepages + // (2) fill mostly-full hugepages more; let mostly-empty hugepages + // empty out. This lets us recover totally empty hugepages (and + // return them to the OS.) + // + // In practice, avoiding fragmentation is by far more important: + // space usage can explode if we don't jealously guard large free ranges. + // + // Our primary measure of fragmentation of a hugepage by a proxy measure: the + // longest free range it contains. If this is short, any free space is + // probably fairly fragmented. It also allows us to instantly know if a + // hugepage can support a given allocation. + // + // We quantize the number of allocations in a hugepage (chunked + // logarithmically.) We favor allocating from hugepages with many allocations + // already present, which helps with (2) above. Note that using the number of + // allocations works substantially better than the number of allocated pages; + // to first order allocations of any size are about as likely to be freed, and + // so (by simple binomial probability distributions) we're more likely to + // empty out a hugepage with 2 5-page allocations than one with 5 1-pages. + // + // The above suggests using the hugepage with the shortest longest empty + // range, breaking ties in favor of fewest number of allocations. This works + // well for most workloads but caused bad page heap fragmentation for some: + // b/63301358 and b/138618726. The intuition for what went wrong is + // that although the tail of large allocations is donated to the Filler (see + // HugePageAwareAllocator::AllocRawHugepages) for use, we don't actually + // want to use them until the regular Filler hugepages are used up. That + // way, they can be reassembled as a single large hugepage range if the + // large allocation is freed. + // Some workloads can tickle this discrepancy a lot, because they have a lot + // of large, medium-lifetime allocations. To fix this we treat hugepages + // that are freshly donated as less preferable than hugepages that have been + // already used for small allocations, regardless of their longest_free_range. + // + // Overall our allocation preference is: + // - We prefer allocating from used freelists rather than freshly donated + // - Among donated freelists we prefer smaller longest_free_range + // - Among used freelists we prefer smaller longest_free_range + // with ties broken by (quantized) alloc counts + // + // We group hugepages by longest_free_range and quantized alloc count and + // store each group in a TrackerList. All freshly-donated groups are stored + // in a "donated" array and the groups with (possibly prior) small allocs are + // stored in a "regular" array. Each of these arrays is encapsulated in a + // HintedTrackerLists object, which stores the array together with a bitmap to + // quickly find non-empty lists. The lists are ordered to satisfy the + // following two useful properties: + // + // - later (nonempty) freelists can always fulfill requests that + // earlier ones could. + // - earlier freelists, by the above criteria, are preferred targets + // for allocation. + // + // So all we have to do is find the first nonempty freelist in the regular + // HintedTrackerList that *could* support our allocation, and it will be our + // best choice. If there is none we repeat with the donated HintedTrackerList. + if (n >= kPagesPerHugePage) return false; + TrackerType *pt; + + bool was_released = false; + do { + pt = regular_alloc_.GetLeast(ListFor(n, 0)); + if (pt) { + ASSERT(!pt->donated()); + break; + } + pt = donated_alloc_.GetLeast(n); + if (pt) { + break; + } + pt = regular_alloc_released_.GetLeast(ListFor(n, 0)); + if (pt) { + ASSERT(!pt->donated()); + was_released = true; + --n_released_; + break; + } + + return false; + } while (false); + ASSERT(pt->longest_free_range() >= n); + *hugepage = pt; + *p = pt->Get(n); + Place(pt); + allocated_ += n; + if (was_released) { + ASSERT(unmapped_ >= n); + unmapped_ -= n; + } + // We're being used for an allocation, so we are no longer considered + // donated by this point. + ASSERT(!pt->donated()); + return true; +} + +// Marks [p, p + n) as usable by new allocations into *pt; returns pt +// if that hugepage is now empty (nullptr otherwise.) +// REQUIRES: pt is owned by this object (has been Contribute()), and +// {pt, p, n} was the result of a previous TryGet. +template +inline TrackerType *HugePageFiller::Put(TrackerType *pt, PageID p, + Length n) { + // Consider releasing [p, p+n). We do this here: + // * To unback the memory before we mark it as free. When partially + // unbacking, we release the pageheap_lock. Another thread could see the + // "free" memory and begin using it before we retake the lock. + // * To maintain maintain the invariant that + // pt->released() => n_released_ > 0 + // which is used for keeping our released hugepage stats in-sync with the + // cached n_released_ value: + // n_released_ = regular_alloc_released_.size() + // size() doesn't exist (it'd be O(n) while holding the pageheap_lock). + // We do this before removing pt from our lists, since another thread may + // encounter our post-Remove() update to n_released_ while encountering pt. + pt->MaybeRelease(p, n); + + Remove(pt); + + pt->Put(p, n); + + allocated_ -= n; + if (pt->released()) { + unmapped_ += n; + unmapping_unaccounted_ += n; + } + if (pt->longest_free_range() == kPagesPerHugePage) { + --size_; + if (pt->released()) { + unmapped_ -= kPagesPerHugePage; + } + return pt; + } + Place(pt); + return nullptr; +} + +template +inline void HugePageFiller::Contribute(TrackerType *pt, + bool donated) { + allocated_ += pt->used_pages(); + if (donated) { + Donate(pt); + } else { + Place(pt); + } + ++size_; +} + +// Find the emptiest possible hugepage and release its free memory +// to the system. Return the number of pages released. +// Currently our implementation doesn't really use this (no need!) +template +inline Length HugePageFiller::ReleasePages() { + // We also do eager release, once we've called this at least once: + // claim credit for anything that gets done. + if (unmapping_unaccounted_ > 0) { + Length n = unmapping_unaccounted_; + unmapping_unaccounted_ = 0; + return n; + } + TrackerType *best = nullptr; + auto loop = [&](TrackerType *pt) { + if (!best || best->used_pages() > pt->used_pages()) { + best = pt; + } + }; + + // We can skip the first kChunks lists as they are known to be 100% full. + // (Those lists are likely to be long.) + // + // We do not examine the regular_alloc_released_ lists, as deallocating on + // an already released page causes it to fully return everything (see + // PageTracker::Put). + // TODO(b/138864853): Perhaps remove donated_alloc_ from here, it's not a + // great candidate for partial release. + regular_alloc_.Iter(loop, kChunks); + donated_alloc_.Iter(loop, 0); + + if (best && !best->full()) { + Remove(best); + Length ret = best->ReleaseFree(); + unmapped_ += ret; + Place(best); + return ret; + } + return 0; +} + +template +inline void HugePageFiller::AddSpanStats( + SmallSpanStats *small, LargeSpanStats *large, + PageAgeHistograms *ages) const { + auto loop = [&](const TrackerType *pt) { + pt->AddSpanStats(small, large, ages); + }; + // We can skip the first kChunks lists as they are known to be 100% full. + regular_alloc_.Iter(loop, kChunks); + donated_alloc_.Iter(loop, 0); + + regular_alloc_released_.Iter(loop, 0); +} + +template +inline BackingStats HugePageFiller::stats() const { + BackingStats s; + s.system_bytes = size_.in_bytes(); + s.free_bytes = free_pages() * kPageSize; + s.unmapped_bytes = unmapped_pages() * kPageSize; + return s; +} + +namespace internal { +// Computes some histograms of fullness. Because nearly empty/full huge pages +// are much more interesting, we calculate 4 buckets at each of the beginning +// and end of size one, and then divide the overall space by 16 to have 16 +// (mostly) even buckets in the middle. +class UsageInfo { + public: + enum Type { kRegular, kDonated, kReleased }; + + UsageInfo() { + size_t i; + for (i = 0; i <= 4 && i < kPagesPerHugePage; ++i) { + bucket_bounds_[buckets_size_] = i; + buckets_size_++; + } + if (i < kPagesPerHugePage - 4) { + // Because kPagesPerHugePage is a power of two, it must be at least 16 + // to get inside this "if" - either i=5 and kPagesPerHugePage=8 and + // the test fails, or kPagesPerHugePage <= 4 and the test fails. + ASSERT(kPagesPerHugePage >= 16); + constexpr int step = kPagesPerHugePage / 16; + // We want to move in "step"-sized increments, aligned every "step". + // So first we have to round i up to the nearest step boundary. This + // logic takes advantage of step being a power of two, so step-1 is + // all ones in the low-order bits. + i = ((i - 1) | (step - 1)) + 1; + for (; i < kPagesPerHugePage - 4; i += step) { + bucket_bounds_[buckets_size_] = i; + buckets_size_++; + } + i = kPagesPerHugePage - 4; + } + for (; i < kPagesPerHugePage; ++i) { + bucket_bounds_[buckets_size_] = i; + buckets_size_++; + } + CHECK_CONDITION(buckets_size_ <= kBucketCapacity); + } + + template + void Record(const TrackerType *pt, Type which) { + const size_t free = kPagesPerHugePage - pt->used_pages(); + const size_t lf = pt->longest_free_range(); + const size_t nalloc = pt->nallocs(); + // This is a little annoying as our buckets *have* to differ; + // nalloc is in [1,256], free_pages and longest_free are in [0, 255]. + free_page_histo_[which][BucketNum(free)]++; + longest_free_histo_[which][BucketNum(lf)]++; + nalloc_histo_[which][BucketNum(nalloc - 1)]++; + } + + void Print(TCMalloc_Printer *out) { + PrintHisto(out, free_page_histo_[kRegular], + "# of regular hps with a<= # of free pages CreateSubRegion("filler_tracker"); + scoped.PrintRaw("type", kTrackerTypes[i]); + PrintHisto(&scoped, free_page_histo_[i], "free_pages_histogram", 0); + PrintHisto(&scoped, longest_free_histo_[i], + "longest_free_range_histogram", 0); + PrintHisto(&scoped, nalloc_histo_[i], "allocations_histogram", 1); + } + } + + private: + // Maximum of 4 buckets at the start and end, and 16 in the middle. + static constexpr size_t kBucketCapacity = 4 + 16 + 4; + using Histo = size_t[kBucketCapacity]; + + int BucketNum(int page) { + auto it = + std::upper_bound(bucket_bounds_, bucket_bounds_ + buckets_size_, page); + CHECK_CONDITION(it != bucket_bounds_); + return it - bucket_bounds_ - 1; + } + + void PrintHisto(TCMalloc_Printer *out, Histo h, const char blurb[], + size_t offset) { + out->printf("\nHugePageFiller: %s", blurb); + for (size_t i = 0; i < buckets_size_; ++i) { + if (i % 6 == 0) { + out->printf("\nHugePageFiller:"); + } + out->printf(" <%3zu<=%6zu", bucket_bounds_[i] + offset, h[i]); + } + out->printf("\n"); + } + + void PrintHisto(PbtxtRegion *hpaa, Histo h, const char key[], size_t offset) { + for (size_t i = 0; i < buckets_size_; ++i) { + auto hist = hpaa->CreateSubRegion(key); + hist.PrintI64("lower_bound", bucket_bounds_[i] + offset); + hist.PrintI64("upper_bound", + (i == buckets_size_ - 1 ? bucket_bounds_[i] + : bucket_bounds_[i + 1] - 1) + + offset); + hist.PrintI64("value", h[i]); + } + } + + // Arrays, because they are split per alloc type. + Histo free_page_histo_[3]{}; + Histo longest_free_histo_[3]{}; + Histo nalloc_histo_[3]{}; + size_t bucket_bounds_[kBucketCapacity]; + int buckets_size_ = 0; +}; +} // namespace internal + +template +inline void HugePageFiller::Print(TCMalloc_Printer *out, + bool everything) const { + out->printf("HugePageFiller: densely pack small requests into hugepages\n"); + + HugeLength nrel = n_released_; + HugeLength nfull = NHugePages(0); + + // note kChunks, not kNumLists here--we're iterating *full* lists. + for (size_t chunk = 0; chunk < kChunks; ++chunk) { + nfull += NHugePages(regular_alloc_[ListFor(/*longest=*/0, chunk)].length()); + } + // A donated alloc full list is impossible because it would have never been + // donated in the first place. (It's an even hugepage.) + ASSERT(donated_alloc_[0].empty()); + // Evaluate a/b, avoiding division by zero + const auto safe_div = [](double a, double b) { return b == 0 ? 0 : a / b; }; + const HugeLength n_nonfull = size() - nrel - nfull; + out->printf( + "HugePageFiller: %zu total, %zu full, %zu partial, %zu released, 0 " + "quarantined\n", + size().raw_num(), nfull.raw_num(), n_nonfull.raw_num(), nrel.raw_num()); + out->printf("HugePageFiller: %zu pages free in %zu hugepages, %.4f free\n", + free_pages(), size().raw_num(), + safe_div(free_pages(), size().in_pages())); + + out->printf("HugePageFiller: among non-fulls, %.4f free\n", + safe_div(free_pages(), n_nonfull.in_pages())); + + out->printf( + "HugePageFiller: %zu hugepages partially released, %.4f released\n", + nrel.raw_num(), safe_div(unmapped_pages(), nrel.in_pages())); + out->printf("HugePageFiller: %.4f of used pages hugepageable\n", + hugepage_frac()); + if (!everything) return; + + // Compute some histograms of fullness. + using ::tcmalloc::internal::UsageInfo; + UsageInfo usage; + regular_alloc_.Iter( + [&](const TrackerType *pt) { usage.Record(pt, UsageInfo::kRegular); }, 0); + donated_alloc_.Iter( + [&](const TrackerType *pt) { usage.Record(pt, UsageInfo::kDonated); }, 0); + regular_alloc_released_.Iter( + [&](const TrackerType *pt) { usage.Record(pt, UsageInfo::kReleased); }, + 0); + + out->printf("\n"); + out->printf("HugePageFiller: fullness histograms\n"); + usage.Print(out); +} + +template +inline void HugePageFiller::PrintInPbtxt( + PbtxtRegion *hpaa, uint64_t filler_usage_used) const { + HugeLength nrel = n_released_; + HugeLength nfull = NHugePages(0); + + // note kChunks, not kNumLists here--we're iterating *full* lists. + for (size_t chunk = 0; chunk < kChunks; ++chunk) { + nfull += NHugePages(regular_alloc_[ListFor(/*longest=*/0, chunk)].length()); + } + // A donated alloc full list is impossible because it would have never been + // donated in the first place. (It's an even hugepage.) + ASSERT(donated_alloc_[0].empty()); + // Evaluate a/b, avoiding division by zero + const auto safe_div = [](double a, double b) { return b == 0 ? 0 : a / b; }; + const HugeLength n_nonfull = size() - nrel - nfull; + hpaa->PrintI64("filler_full_huge_pages", nfull.raw_num()); + hpaa->PrintI64("filler_partial_huge_pages", n_nonfull.raw_num()); + hpaa->PrintI64("filler_released_huge_pages", nrel.raw_num()); + hpaa->PrintI64("filler_free_pages", free_pages()); + hpaa->PrintI64( + "filler_unmapped_bytes", + static_cast(nrel.raw_num() * + safe_div(unmapped_pages(), nrel.in_pages()))); + hpaa->PrintI64("filler_hugepageable_used_bytes", + static_cast(hugepage_frac() * + static_cast(filler_usage_used))); + + // Compute some histograms of fullness. + using ::tcmalloc::internal::UsageInfo; + UsageInfo usage; + regular_alloc_.Iter( + [&](const TrackerType *pt) { usage.Record(pt, UsageInfo::kRegular); }, 0); + donated_alloc_.Iter( + [&](const TrackerType *pt) { usage.Record(pt, UsageInfo::kDonated); }, 0); + regular_alloc_released_.Iter( + [&](const TrackerType *pt) { usage.Record(pt, UsageInfo::kReleased); }, + 0); + + usage.Print(hpaa); +} + +template +inline size_t HugePageFiller::IndexFor(TrackerType *pt) { + ASSERT(!pt->empty()); + // Prefer to allocate from hugepages with many allocations already present; + // spaced logarithmically. + const size_t na = pt->nallocs(); + // This equals 63 - ceil(log2(na)) + // (or 31 if size_t is 4 bytes, etc.) + const size_t neg_ceil_log = __builtin_clzl(2 * na - 1); + + // We want the same spread as neg_ceil_log, but spread over [0, + // kChunks) (clamped at the left edge) instead of [0, 64). So subtract off + // the difference (computed by forcing na=1 to kChunks - 1.) + const size_t kOffset = __builtin_clzl(1) - (kChunks - 1); + const size_t i = std::max(neg_ceil_log, kOffset) - kOffset; + ASSERT(i < kChunks); + return i; +} + +template +inline size_t HugePageFiller::ListFor(const size_t longest, + const size_t chunk) { + ASSERT(chunk < kChunks); + ASSERT(longest < kPagesPerHugePage); + return longest * kChunks + chunk; +} + +template +inline void HugePageFiller::Remove(TrackerType *pt) { + size_t longest = pt->longest_free_range(); + ASSERT(longest < kPagesPerHugePage); + + if (pt->released()) { + --n_released_; + } + + if (pt->donated()) { + donated_alloc_.Remove(pt, longest); + } else { + size_t chunk = IndexFor(pt); + auto &list = pt->released() ? regular_alloc_released_ : regular_alloc_; + size_t i = ListFor(longest, chunk); + list.Remove(pt, i); + } +} + +template +inline void HugePageFiller::Place(TrackerType *pt) { + size_t chunk = IndexFor(pt); + size_t longest = pt->longest_free_range(); + ASSERT(longest < kPagesPerHugePage); + + if (pt->released()) { + ++n_released_; + } + // Once a donated alloc is used in any way, it degenerates into being a + // regular alloc. This allows the algorithm to keep using it (we had to be + // desperate to use it in the first place), and thus preserves the other + // donated allocs. + pt->set_donated(false); + + auto *list = pt->released() ? ®ular_alloc_released_ : ®ular_alloc_; + size_t i = ListFor(longest, chunk); + list->Add(pt, i); +} + +template +inline void HugePageFiller::Donate(TrackerType *pt) { + size_t longest = pt->longest_free_range(); + ASSERT(longest < kPagesPerHugePage); + + // We should never be donating already-released trackers! + ASSERT(!pt->released()); + pt->set_donated(true); + + donated_alloc_.Add(pt, longest); +} + +template +inline double HugePageFiller::hugepage_frac() const { + // How many of our used pages are on non-huge pages? Since + // everything on a released hugepage is either used or released, + // just the difference: + const Length nrel = n_released_.in_pages(); + const Length used = used_pages(); + const Length unmapped = unmapped_pages(); + const Length used_on_rel = nrel >= unmapped ? nrel - unmapped : 0; + ASSERT(used >= used_on_rel); + const Length used_on_huge = used - used_on_rel; + + const Length denom = used > 0 ? used : 1; + const double ret = static_cast(used_on_huge) / denom; + ASSERT(ret >= 0); + ASSERT(ret <= 1); + // TODO(b/117611602): Replace this with absl::clamp when that is + // open-sourced. + return (ret < 0) ? 0 : (ret > 1) ? 1 : ret; +} + +// Helper for stat functions. +template +inline Length HugePageFiller::free_pages() const { + return size().in_pages() - used_pages() - unmapped_pages(); +} + +} // namespace tcmalloc + +#endif // TCMALLOC_HUGE_PAGE_FILLER_H_ diff --git a/tcmalloc/huge_page_filler_test.cc b/tcmalloc/huge_page_filler_test.cc new file mode 100644 index 000000000..8bcaf526c --- /dev/null +++ b/tcmalloc/huge_page_filler_test.cc @@ -0,0 +1,2236 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/huge_page_filler.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/algorithm/container.h" +#include "absl/base/thread_annotations.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/flags/flag.h" +#include "absl/memory/memory.h" +#include "absl/random/bernoulli_distribution.h" +#include "absl/random/random.h" +#include "absl/synchronization/blocking_counter.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/stats.h" + +ABSL_FLAG(uint64_t, page_tracker_defrag_lim, 32, + "Max allocation size for defrag test"); + +ABSL_FLAG(uint64_t, frag_req_limit, 32, "request size limit for frag test"); +ABSL_FLAG(uint64_t, frag_size, 512 * 1024, + "target number of pages for frag test"); +ABSL_FLAG(uint64_t, frag_iters, 10 * 1000 * 1000, "iterations for frag test"); + +ABSL_FLAG(double, release_until, 0.01, + "fraction of used we target in pageheap"); +ABSL_FLAG(uint64_t, bytes, 1024 * 1024 * 1024, "baseline usage"); +ABSL_FLAG(double, growth_factor, 2.0, "growth over baseline"); + +namespace tcmalloc { +namespace { + +// This is an arbitrary distribution taken from page requests from +// an empirical driver test. It seems realistic enough. We trim it to +// [1, last]. +// +std::discrete_distribution EmpiricalDistribution(Length last) { + std::vector page_counts = []() { + std::vector ret(12289); + ret[1] = 375745576; + ret[2] = 59737961; + ret[3] = 35549390; + ret[4] = 43896034; + ret[5] = 17484968; + ret[6] = 15830888; + ret[7] = 9021717; + ret[8] = 208779231; + ret[9] = 3775073; + ret[10] = 25591620; + ret[11] = 2483221; + ret[12] = 3595343; + ret[13] = 2232402; + ret[16] = 17639345; + ret[21] = 4215603; + ret[25] = 4212756; + ret[28] = 760576; + ret[30] = 2166232; + ret[32] = 3021000; + ret[40] = 1186302; + ret[44] = 479142; + ret[48] = 570030; + ret[49] = 101262; + ret[55] = 592333; + ret[57] = 236637; + ret[64] = 785066; + ret[65] = 44700; + ret[73] = 539659; + ret[80] = 342091; + ret[96] = 488829; + ret[97] = 504; + ret[113] = 242921; + ret[128] = 157206; + ret[129] = 145; + ret[145] = 117191; + ret[160] = 91818; + ret[192] = 67824; + ret[193] = 144; + ret[225] = 40711; + ret[256] = 38569; + ret[257] = 1; + ret[297] = 21738; + ret[320] = 13510; + ret[384] = 19499; + ret[432] = 13856; + ret[490] = 9849; + ret[512] = 3024; + ret[640] = 3655; + ret[666] = 3963; + ret[715] = 2376; + ret[768] = 288; + ret[1009] = 6389; + ret[1023] = 2788; + ret[1024] = 144; + ret[1280] = 1656; + ret[1335] = 2592; + ret[1360] = 3024; + ret[1536] = 432; + ret[2048] = 288; + ret[2560] = 72; + ret[3072] = 360; + ret[12288] = 216; + return ret; + }(); + + Length lim = last; + auto i = page_counts.begin(); + // remember lim might be too big (in which case we use the whole + // vector...) + + auto j = page_counts.size() > lim ? i + (lim + 1) : page_counts.end(); + + return std::discrete_distribution(i, j); +} + +class PageTrackerTest : public testing::Test { + protected: + PageTrackerTest() + : // an unlikely magic page + huge_(HugePageContaining(reinterpret_cast(0x1abcde200000))), + tracker_(huge_, absl::base_internal::CycleClock::Now()) {} + + ~PageTrackerTest() override { mock_.VerifyAndClear(); } + + struct PAlloc { + PageID p; + Length n; + }; + + void Mark(PAlloc a, size_t mark) { + EXPECT_LE(huge_.first_page(), a.p); + size_t index = a.p - huge_.first_page(); + size_t end = index + a.n; + EXPECT_LE(end, kPagesPerHugePage); + for (; index < end; ++index) { + marks_[index] = mark; + } + } + + class MockUnbackInterface { + public: + void Unback(void *p, size_t len) { + CHECK_CONDITION(actual_index_ < kMaxCalls); + actual_[actual_index_] = {p, len}; + ++actual_index_; + } + + void Expect(void *p, size_t len) { + CHECK_CONDITION(expected_index_ < kMaxCalls); + expected_[expected_index_] = {p, len}; + ++expected_index_; + } + + void VerifyAndClear() { + EXPECT_EQ(expected_index_, actual_index_); + for (size_t i = 0; i < expected_index_; ++i) { + EXPECT_EQ(expected_[i].ptr, actual_[i].ptr); + EXPECT_EQ(expected_[i].len, actual_[i].len); + } + memset(expected_, 0, sizeof(expected_)); + memset(actual_, 0, sizeof(actual_)); + expected_index_ = 0; + actual_index_ = 0; + } + + private: + struct CallArgs { + void *ptr{nullptr}; + size_t len{0}; + }; + + static constexpr size_t kMaxCalls = 10; + CallArgs expected_[kMaxCalls] = {}; + CallArgs actual_[kMaxCalls] = {}; + size_t expected_index_{0}; + size_t actual_index_{0}; + }; + + static void MockUnback(void *p, size_t len); + + typedef PageTracker TestPageTracker; + + // strict because release calls should only happen when we ask + static MockUnbackInterface mock_; + + void Check(PAlloc a, size_t mark) { + EXPECT_LE(huge_.first_page(), a.p); + size_t index = a.p - huge_.first_page(); + size_t end = index + a.n; + EXPECT_LE(end, kPagesPerHugePage); + for (; index < end; ++index) { + EXPECT_EQ(mark, marks_[index]); + } + } + size_t marks_[kPagesPerHugePage]; + HugePage huge_; + TestPageTracker tracker_; + + void ExpectPages(PAlloc a) { + void *ptr = reinterpret_cast(a.p << kPageShift); + size_t bytes = a.n << kPageShift; + mock_.Expect(ptr, bytes); + } + + PAlloc Get(Length n) { + absl::base_internal::SpinLockHolder l(&pageheap_lock); + PageID p = tracker_.Get(n); + return {p, n}; + } + + void Put(PAlloc a) { + absl::base_internal::SpinLockHolder l(&pageheap_lock); + tracker_.Put(a.p, a.n); + } + + size_t ReleaseFree() { + absl::base_internal::SpinLockHolder l(&pageheap_lock); + return tracker_.ReleaseFree(); + } + + void MaybeRelease(PAlloc a) { + absl::base_internal::SpinLockHolder l(&pageheap_lock); + tracker_.MaybeRelease(a.p, a.n); + } +}; + +void PageTrackerTest::MockUnback(void *p, size_t len) { mock_.Unback(p, len); } + +PageTrackerTest::MockUnbackInterface PageTrackerTest::mock_; + +TEST_F(PageTrackerTest, AllocSane) { + Length free = kPagesPerHugePage; + Length n = 1; + std::vector allocs; + // This should work without fragmentation. + while (n <= free) { + ASSERT_LE(n, tracker_.longest_free_range()); + EXPECT_EQ(kPagesPerHugePage - free, tracker_.used_pages()); + EXPECT_EQ(free, tracker_.free_pages()); + PAlloc a = Get(n); + Mark(a, n); + allocs.push_back(a); + free -= n; + n++; + } + + // All should be distinct + for (auto alloc : allocs) { + Check(alloc, alloc.n); + } +} + +TEST_F(PageTrackerTest, Releasing) { + static const size_t kAllocSize = kPagesPerHugePage / 4; + PAlloc a1 = Get(kAllocSize - 3); + PAlloc a2 = Get(kAllocSize); + PAlloc a3 = Get(kAllocSize + 1); + PAlloc a4 = Get(kAllocSize + 2); + + Put(a2); + Put(a4); + // We now have a hugepage that looks like [alloced] [free] [alloced] [free]. + // The free parts should be released when we mark the hugepage as such, + // but not the allocated parts. + ExpectPages(a2); + ExpectPages(a4); + ReleaseFree(); + mock_.VerifyAndClear(); + + // Now we return the other parts, and they *should* get released. + ExpectPages(a1); + ExpectPages(a3); + + MaybeRelease(a1); + Put(a1); + + MaybeRelease(a3); + Put(a3); +} + +TEST_F(PageTrackerTest, Defrag) { + absl::BitGen rng; + const Length N = absl::GetFlag(FLAGS_page_tracker_defrag_lim); + auto dist = EmpiricalDistribution(N); + + std::vector allocs; + + std::vector doomed; + while (tracker_.longest_free_range() > 0) { + Length n; + do { + n = dist(rng); + } while (n > tracker_.longest_free_range()); + PAlloc a = Get(n); + (absl::Bernoulli(rng, 1.0 / 2) ? allocs : doomed).push_back(a); + } + + for (auto d : doomed) { + Put(d); + } + + static const size_t kReps = 250 * 1000; + + std::vector frag_samples; + std::vector longest_free_samples; + frag_samples.reserve(kReps); + longest_free_samples.reserve(kReps); + for (size_t i = 0; i < kReps; ++i) { + const Length free = kPagesPerHugePage - tracker_.used_pages(); + // Ideally, we'd like all of our free space to stay in a single + // nice little run. + const Length longest = tracker_.longest_free_range(); + double frag = free > 0 ? static_cast(longest) / free : 1; + + if (i % (kReps / 25) == 0) { + printf("free = %zu longest = %zu frag = %f\n", free, longest, frag); + } + frag_samples.push_back(frag); + longest_free_samples.push_back(longest); + + // Randomly grow or shrink (picking the only safe option when we're either + // full or empty.) + if (tracker_.longest_free_range() == 0 || + (absl::Bernoulli(rng, 1.0 / 2) && !allocs.empty())) { + size_t index = absl::Uniform(rng, 0, allocs.size()); + std::swap(allocs[index], allocs.back()); + Put(allocs.back()); + allocs.pop_back(); + } else { + Length n; + do { + n = dist(rng); + } while (n > tracker_.longest_free_range()); + allocs.push_back(Get(n)); + } + } + + std::sort(frag_samples.begin(), frag_samples.end()); + std::sort(longest_free_samples.begin(), longest_free_samples.end()); + + { + const double p10 = frag_samples[kReps * 10 / 100]; + const double p25 = frag_samples[kReps * 25 / 100]; + const double p50 = frag_samples[kReps * 50 / 100]; + const double p75 = frag_samples[kReps * 75 / 100]; + const double p90 = frag_samples[kReps * 90 / 100]; + printf("Fragmentation quantiles:\n"); + printf("p10: %f p25: %f p50: %f p75: %f p90: %f\n", p10, p25, p50, p75, + p90); + // We'd like to prety consistently rely on (75% of the time) reasonable + // defragmentation (50% of space is fully usable...) + // ...but we currently can't hit that mark consistently. + // The situation is worse on ppc with larger huge pages: + // pass rate for test is ~50% at 0.20. Reducing from 0.2 to 0.07. + // TODO(b/127466107) figure out a better solution. + EXPECT_GE(p25, 0.07); + } + + { + const Length p10 = longest_free_samples[kReps * 10 / 100]; + const Length p25 = longest_free_samples[kReps * 25 / 100]; + const Length p50 = longest_free_samples[kReps * 50 / 100]; + const Length p75 = longest_free_samples[kReps * 75 / 100]; + const Length p90 = longest_free_samples[kReps * 90 / 100]; + printf("Longest free quantiles:\n"); + printf("p10: %zu p25: %zu p50: %zu p75: %zu p90: %zu\n", p10, p25, p50, p75, + p90); + // Similarly, we'd really like for there usually (p25) to be a space + // for a large allocation (N - note that we've cooked the books so that + // the page tracker is going to be something like half empty (ish) and N + // is small, so that should be doable.) + // ...but, of course, it isn't. + EXPECT_GE(p25, 4); + } + + for (auto a : allocs) { + Put(a); + } +} + +TEST_F(PageTrackerTest, Stats) { + struct Helper { + static void Stat(const TestPageTracker &tracker, + std::vector *small_backed, + std::vector *small_unbacked, LargeSpanStats *large, + double *avg_age_backed, double *avg_age_unbacked) { + SmallSpanStats small; + memset(&small, 0, sizeof(small)); + memset(large, 0, sizeof(*large)); + PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); + tracker.AddSpanStats(&small, large, &ages); + small_backed->clear(); + small_unbacked->clear(); + for (int i = 0; i < kMaxPages; ++i) { + for (int j = 0; j < small.normal_length[i]; ++j) { + small_backed->push_back(i); + } + + for (int j = 0; j < small.returned_length[i]; ++j) { + small_unbacked->push_back(i); + } + } + + *avg_age_backed = ages.GetTotalHistogram(false)->avg_age(); + *avg_age_unbacked = ages.GetTotalHistogram(true)->avg_age(); + } + }; + + LargeSpanStats large; + std::vector small_backed, small_unbacked; + double avg_age_backed, avg_age_unbacked; + + const PageID p = Get(kPagesPerHugePage).p; + const PageID end = p + kPagesPerHugePage; + PageID next = p; + Put({next, kMaxPages + 1}); + next += kMaxPages + 1; + + absl::SleepFor(absl::Milliseconds(10)); + Helper::Stat(tracker_, &small_backed, &small_unbacked, &large, + &avg_age_backed, &avg_age_unbacked); + EXPECT_THAT(small_backed, testing::ElementsAre()); + EXPECT_THAT(small_unbacked, testing::ElementsAre()); + EXPECT_EQ(1, large.spans); + EXPECT_EQ(kMaxPages + 1, large.normal_pages); + EXPECT_EQ(0, large.returned_pages); + EXPECT_LE(0.01, avg_age_backed); + + next++; + Put({next, 1}); + next += 1; + absl::SleepFor(absl::Milliseconds(20)); + Helper::Stat(tracker_, &small_backed, &small_unbacked, &large, + &avg_age_backed, &avg_age_unbacked); + EXPECT_THAT(small_backed, testing::ElementsAre(1)); + EXPECT_THAT(small_unbacked, testing::ElementsAre()); + EXPECT_EQ(1, large.spans); + EXPECT_EQ(kMaxPages + 1, large.normal_pages); + EXPECT_EQ(0, large.returned_pages); + EXPECT_LE(((kMaxPages + 1) * 0.03 + 1 * 0.02) / (kMaxPages + 2), + avg_age_backed); + EXPECT_EQ(0, avg_age_unbacked); + + next++; + Put({next, 2}); + next += 2; + absl::SleepFor(absl::Milliseconds(30)); + Helper::Stat(tracker_, &small_backed, &small_unbacked, &large, + &avg_age_backed, &avg_age_unbacked); + EXPECT_THAT(small_backed, testing::ElementsAre(1, 2)); + EXPECT_THAT(small_unbacked, testing::ElementsAre()); + EXPECT_EQ(1, large.spans); + EXPECT_EQ(kMaxPages + 1, large.normal_pages); + EXPECT_EQ(0, large.returned_pages); + EXPECT_LE(((kMaxPages + 1) * 0.06 + 1 * 0.05 + 2 * 0.03) / (kMaxPages + 4), + avg_age_backed); + EXPECT_EQ(0, avg_age_unbacked); + + next++; + Put({next, 3}); + next += 3; + ASSERT_LE(next, end); + absl::SleepFor(absl::Milliseconds(40)); + Helper::Stat(tracker_, &small_backed, &small_unbacked, &large, + &avg_age_backed, &avg_age_unbacked); + EXPECT_THAT(small_backed, testing::ElementsAre(1, 2, 3)); + EXPECT_THAT(small_unbacked, testing::ElementsAre()); + EXPECT_EQ(1, large.spans); + EXPECT_EQ(kMaxPages + 1, large.normal_pages); + EXPECT_EQ(0, large.returned_pages); + EXPECT_LE(((kMaxPages + 1) * 0.10 + 1 * 0.09 + 2 * 0.07 + 3 * 0.04) / + (kMaxPages + 7), + avg_age_backed); + EXPECT_EQ(0, avg_age_unbacked); + + ExpectPages({p, kMaxPages + 1}); + ExpectPages({p + kMaxPages + 2, 1}); + ExpectPages({p + kMaxPages + 4, 2}); + ExpectPages({p + kMaxPages + 7, 3}); + EXPECT_EQ(kMaxPages + 7, ReleaseFree()); + absl::SleepFor(absl::Milliseconds(100)); + Helper::Stat(tracker_, &small_backed, &small_unbacked, &large, + &avg_age_backed, &avg_age_unbacked); + EXPECT_THAT(small_backed, testing::ElementsAre()); + EXPECT_THAT(small_unbacked, testing::ElementsAre(1, 2, 3)); + EXPECT_EQ(1, large.spans); + EXPECT_EQ(0, large.normal_pages); + EXPECT_EQ(kMaxPages + 1, large.returned_pages); + EXPECT_EQ(0, avg_age_backed); + EXPECT_LE(0.1, avg_age_unbacked); +} + +class BlockingUnback { + public: + static void Unback(void *p, size_t len) { + if (!mu_) { + return; + } + + if (counter) { + counter->DecrementCount(); + } + + mu_->Lock(); + mu_->Unlock(); + } + + static void set_lock(absl::Mutex *mu) { mu_ = mu; } + + static absl::BlockingCounter *counter; + + private: + static thread_local absl::Mutex *mu_; +}; + +thread_local absl::Mutex *BlockingUnback::mu_ = nullptr; +absl::BlockingCounter *BlockingUnback::counter = nullptr; + +class FillerTest : public testing::Test { + protected: + // Our templating approach lets us directly override certain functions + // and have mocks without virtualization. It's a bit funky but works. + typedef PageTracker FakeTracker; + + // We have backing of one word per (normal-sized) page for our "hugepages". + std::vector backing_; + // This is space efficient enough that we won't bother recycling pages. + HugePage GetBacking() { + intptr_t i = backing_.size(); + backing_.resize(i + kPagesPerHugePage); + intptr_t addr = i << kPageShift; + CHECK_CONDITION(addr % kHugePageSize == 0); + return HugePageContaining(reinterpret_cast(addr)); + } + + size_t *GetFakePage(PageID p) { return &backing_[p]; } + + void MarkRange(PageID p, Length n, size_t mark) { + for (Length i = 0; i < n; ++i) { + *GetFakePage(p + i) = mark; + } + } + + void CheckRange(PageID p, Length n, size_t mark) { + for (Length i = 0; i < n; ++i) { + EXPECT_EQ(mark, *GetFakePage(p + i)); + } + } + + HugePageFiller filler_; + + FillerTest() { new (&filler_) HugePageFiller; } + + ~FillerTest() override { + EXPECT_EQ(NHugePages(0), filler_.size()); + } + + struct PAlloc { + FakeTracker *pt; + PageID p; + Length n; + size_t mark; + }; + + void Mark(const PAlloc &alloc) { MarkRange(alloc.p, alloc.n, alloc.mark); } + + void Check(const PAlloc &alloc) { CheckRange(alloc.p, alloc.n, alloc.mark); } + + size_t next_mark_{0}; + + HugeLength hp_contained_{NHugePages(0)}; + Length total_allocated_{0}; + + absl::InsecureBitGen gen_; + + void CheckStats() { + EXPECT_EQ(hp_contained_, filler_.size()); + auto stats = filler_.stats(); + const uint64_t freelist_bytes = stats.free_bytes + stats.unmapped_bytes; + const uint64_t used_bytes = stats.system_bytes - freelist_bytes; + EXPECT_EQ(total_allocated_ * kPageSize, used_bytes); + EXPECT_EQ((hp_contained_.in_pages() - total_allocated_) * kPageSize, + freelist_bytes); + } + PAlloc AllocateRaw(Length n, bool donated = false) { + PAlloc ret; + ret.n = n; + ret.mark = ++next_mark_; + bool success = false; + if (!donated) { // Donated means always create a new hugepage + absl::base_internal::SpinLockHolder l(&pageheap_lock); + success = filler_.TryGet(n, &ret.pt, &ret.p); + } + if (!success) { + ret.pt = + new FakeTracker(GetBacking(), absl::base_internal::CycleClock::Now()); + { + absl::base_internal::SpinLockHolder l(&pageheap_lock); + ret.p = ret.pt->Get(n); + } + filler_.Contribute(ret.pt, donated); + ++hp_contained_; + } + + total_allocated_ += n; + return ret; + } + + PAlloc Allocate(Length n, bool donated = false) { + CHECK_CONDITION(n <= kPagesPerHugePage); + PAlloc ret = AllocateRaw(n, donated); + ret.n = n; + Mark(ret); + CheckStats(); + return ret; + } + + // Returns true iff the filler returned an empty hugepage. + bool DeleteRaw(const PAlloc &p) { + FakeTracker *pt; + { + absl::base_internal::SpinLockHolder l(&pageheap_lock); + pt = filler_.Put(p.pt, p.p, p.n); + } + total_allocated_ -= p.n; + if (pt != nullptr) { + EXPECT_EQ(kPagesPerHugePage, pt->longest_free_range()); + EXPECT_TRUE(pt->empty()); + --hp_contained_; + delete pt; + return true; + } + + return false; + } + + // Returns true iff the filler returned an empty hugepage + bool Delete(const PAlloc &p) { + Check(p); + bool r = DeleteRaw(p); + CheckStats(); + return r; + } + + Length ReleasePages() { + absl::base_internal::SpinLockHolder l(&pageheap_lock); + return filler_.ReleasePages(); + } + + // Generates an "interesting" pattern of allocations that highlights all the + // various features of our stats. + std::vector GenerateInterestingAllocs(); +}; + +TEST_F(FillerTest, Density) { + absl::BitGen rng; + // Start with a really annoying setup: some hugepages half + // empty (randomly) + std::vector allocs; + std::vector doomed_allocs; + static const HugeLength kNumHugePages = NHugePages(64); + for (Length i = 0; i < kNumHugePages.in_pages(); ++i) { + ASSERT_EQ(i, filler_.pages_allocated()); + if (absl::Bernoulli(rng, 1.0 / 2)) { + allocs.push_back(Allocate(1)); + } else { + doomed_allocs.push_back(Allocate(1)); + } + } + for (auto d : doomed_allocs) { + Delete(d); + } + EXPECT_EQ(kNumHugePages, filler_.size()); + // We want a good chance of touching ~every allocation. + size_t n = allocs.size(); + // Now, randomly add and delete to the allocations. + // We should converge to full and empty pages. + for (int j = 0; j < 6; j++) { + absl::c_shuffle(allocs, rng); + + for (int i = 0; i < n; ++i) { + Delete(allocs[i]); + allocs[i] = Allocate(1); + ASSERT_EQ(n, filler_.pages_allocated()); + } + } + + EXPECT_GE(allocs.size() / kPagesPerHugePage + 1, filler_.size().raw_num()); + + // clean up, check for failures + for (auto a : allocs) { + Delete(a); + ASSERT_EQ(--n, filler_.pages_allocated()); + } +} + +TEST_F(FillerTest, Release) { + static const size_t kAlloc = kPagesPerHugePage / 2; + PAlloc p1 = Allocate(kAlloc - 1); + PAlloc p2 = Allocate(kAlloc + 1); + + PAlloc p3 = Allocate(kAlloc - 2); + PAlloc p4 = Allocate(kAlloc + 2); + // We have two hugepages, both full: nothing to release. + ASSERT_EQ(0, ReleasePages()); + Delete(p1); + Delete(p3); + // Now we should see the p1 hugepage - emptier - released. + ASSERT_EQ(kAlloc - 1, ReleasePages()); + EXPECT_EQ(kAlloc - 1, filler_.unmapped_pages()); + ASSERT_TRUE(p1.pt->released()); + + // We expect to reuse p1.pt. + PAlloc p5 = Allocate(kAlloc - 1); + ASSERT_TRUE(p1.pt == p5.pt || p3.pt == p5.pt); + + Delete(p2); + Delete(p4); + Delete(p5); +} + +TEST_F(FillerTest, Fragmentation) { + absl::BitGen rng; + auto dist = EmpiricalDistribution(absl::GetFlag(FLAGS_frag_req_limit)); + + std::vector allocs; + Length total = 0; + while (total < absl::GetFlag(FLAGS_frag_size)) { + Length n = dist(rng); + total += n; + allocs.push_back(AllocateRaw(n)); + } + + double max_slack = 0.0; + const size_t kReps = absl::GetFlag(FLAGS_frag_iters); + for (size_t i = 0; i < kReps; ++i) { + auto stats = filler_.stats(); + double slack = static_cast(stats.free_bytes) / stats.system_bytes; + + max_slack = std::max(slack, max_slack); + if (i % (kReps / 40) == 0) { + printf("%zu events: %zu allocs totalling %zu slack %f\n", i, + allocs.size(), total, slack); + } + if (absl::Bernoulli(rng, 1.0 / 2)) { + size_t index = absl::Uniform(rng, 0, allocs.size()); + std::swap(allocs[index], allocs.back()); + DeleteRaw(allocs.back()); + total -= allocs.back().n; + allocs.pop_back(); + } else { + Length n = dist(rng); + allocs.push_back(AllocateRaw(n)); + total += n; + } + } + + EXPECT_LE(max_slack, 0.05); + + for (auto a : allocs) { + DeleteRaw(a); + } +} + +static double BytesToMiB(size_t bytes) { return bytes / (1024.0 * 1024.0); } + +using testing::AnyOf; +using testing::Eq; + +TEST_F(FillerTest, HugePageFrac) { + // I don't actually care which we get, both are + // reasonable choices, but don't report a NaN/complain + // about divide by 0s/ give some bogus number for empty. + EXPECT_THAT(filler_.hugepage_frac(), AnyOf(Eq(0), Eq(1))); + static const Length kQ = kPagesPerHugePage / 4; + // These are all on one page: + auto a1 = Allocate(kQ); + auto a2 = Allocate(kQ); + auto a3 = Allocate(kQ - 1); + auto a4 = Allocate(kQ + 1); + + // As are these: + auto a5 = Allocate(kPagesPerHugePage - kQ); + auto a6 = Allocate(kQ); + + EXPECT_EQ(1, filler_.hugepage_frac()); + // Free space doesn't affect it... + Delete(a1); + Delete(a6); + EXPECT_EQ(1, filler_.hugepage_frac()); + + // Releasing the hugepage does. + ASSERT_EQ(kQ, ReleasePages()); + EXPECT_EQ(0.5, filler_.hugepage_frac()); + + // Check our arithmetic in a couple scenarios. + + // 2 kQs on the release and 3 on the hugepage + Delete(a2); + EXPECT_EQ(0.6, filler_.hugepage_frac()); + + // just-over-1 kQ on the release and 3 on the hugepage + Delete(a3); + EXPECT_EQ((3 * kQ) / (4.0 * kQ + 1), filler_.hugepage_frac()); + + // All huge! + Delete(a4); + EXPECT_EQ(1, filler_.hugepage_frac()); + + Delete(a5); +} + +// Repeatedly grow from FLAG_bytes to FLAG_bytes * growth factor, then shrink +// back down by random deletion. Then release partial hugepages until +// pageheap is bounded by some fraction of usage. +// Measure the effective hugepage fraction at peak and baseline usage, +// and the blowup in VSS footprint. +// +// This test is a tool for analyzing parameters -- not intended as an actual +// unit test. +TEST_F(FillerTest, DISABLED_ReleaseFrac) { + absl::BitGen rng; + const Length baseline = absl::GetFlag(FLAGS_bytes) / kPageSize; + const Length peak = baseline * absl::GetFlag(FLAGS_growth_factor); + const Length free_target = baseline * absl::GetFlag(FLAGS_release_until); + + std::vector allocs; + while (filler_.used_pages() < baseline) { + allocs.push_back(AllocateRaw(1)); + } + + while (true) { + while (filler_.used_pages() < peak) { + allocs.push_back(AllocateRaw(1)); + } + const double peak_frac = filler_.hugepage_frac(); + // VSS + const size_t footprint = filler_.size().in_bytes(); + + std::shuffle(allocs.begin(), allocs.end(), rng); + + size_t limit = allocs.size(); + while (filler_.used_pages() > baseline) { + --limit; + DeleteRaw(allocs[limit]); + } + allocs.resize(limit); + while (filler_.free_pages() > free_target) { + ReleasePages(); + } + const double baseline_frac = filler_.hugepage_frac(); + + printf("%.3f %.3f %6.1f MiB\n", peak_frac, baseline_frac, + BytesToMiB(footprint)); + } +} + +TEST_F(FillerTest, ReleaseAccounting) { + const Length N = kPagesPerHugePage; + auto big = Allocate(N - 2); + auto tiny1 = Allocate(1); + auto tiny2 = Allocate(1); + auto half1 = Allocate(N / 2); + auto half2 = Allocate(N / 2); + + Delete(half1); + Delete(big); + + ASSERT_EQ(NHugePages(2), filler_.size()); + + // We should pick the [empty big][full tiny] hugepage here. + EXPECT_EQ(N - 2, ReleasePages()); + EXPECT_EQ(N - 2, filler_.unmapped_pages()); + // This should trigger a release too: + Delete(tiny1); + EXPECT_EQ(N - 1, filler_.unmapped_pages()); + + // As should this, but this will drop the whole hugepage + Delete(tiny2); + EXPECT_EQ(0, filler_.unmapped_pages()); + EXPECT_EQ(NHugePages(1), filler_.size()); + + // This shouldn't trigger any release: we just claim credit for the + // releases we did automatically on tiny1 and tiny2. + EXPECT_EQ(2, ReleasePages()); + EXPECT_EQ(0, filler_.unmapped_pages()); + EXPECT_EQ(NHugePages(1), filler_.size()); + + // Now we pick the half/half hugepage + EXPECT_EQ(N / 2, ReleasePages()); + EXPECT_EQ(N / 2, filler_.unmapped_pages()); + + Delete(half2); + EXPECT_EQ(NHugePages(0), filler_.size()); + EXPECT_EQ(0, filler_.unmapped_pages()); +} + +TEST_F(FillerTest, ReleaseWithReuse) { + const Length N = kPagesPerHugePage; + auto half = Allocate(N / 2); + auto tiny1 = Allocate(N / 4); + auto tiny2 = Allocate(N / 4); + + Delete(half); + + ASSERT_EQ(NHugePages(1), filler_.size()); + + // We should be able to release the pages from half1. + EXPECT_EQ(N / 2, ReleasePages()); + EXPECT_EQ(N / 2, filler_.unmapped_pages()); + + // Release tiny1, release more. + Delete(tiny1); + + EXPECT_EQ(N / 4, ReleasePages()); + EXPECT_EQ(3 * N / 4, filler_.unmapped_pages()); + + // Repopulate, confirm we can't release anything and unmapped pages goes to 0. + tiny1 = Allocate(N / 4); + EXPECT_EQ(0, ReleasePages()); + EXPECT_EQ(N / 2, filler_.unmapped_pages()); + + // Continue repopulating. + half = Allocate(N / 2); + EXPECT_EQ(0, ReleasePages()); + EXPECT_EQ(0, filler_.unmapped_pages()); + EXPECT_EQ(NHugePages(1), filler_.size()); + + // Release everything and cleanup. + Delete(half); + Delete(tiny1); + Delete(tiny2); + EXPECT_EQ(NHugePages(0), filler_.size()); + EXPECT_EQ(0, filler_.unmapped_pages()); +} + +TEST_F(FillerTest, AvoidArbitraryQuarantineVMGrowth) { + const Length N = kPagesPerHugePage; + // Guarantee we have a ton of released pages go empty. + for (int i = 0; i < 10 * 1000; ++i) { + auto half1 = Allocate(N / 2); + auto half2 = Allocate(N / 2); + Delete(half1); + ASSERT_EQ(N / 2, ReleasePages()); + Delete(half2); + } + + auto s = filler_.stats(); + EXPECT_GE(1024 * 1024 * 1024, s.system_bytes); +} + +TEST_F(FillerTest, StronglyPreferNonDonated) { + // We donate several huge pages of varying fullnesses. Then we make several + // allocations that would be perfect fits for the donated hugepages, *after* + // making one allocation that won't fit, to ensure that a huge page is + // contributed normally. Finally, we verify that we can still get the + // donated huge pages back. (I.e. they weren't used.) + std::vector donated; + ASSERT_GE(kPagesPerHugePage, 10); + for (int i = 1; i <= 3; ++i) { + donated.push_back(Allocate(kPagesPerHugePage - i, /*donated=*/true)); + } + + std::vector regular; + for (int i = 4; i >= 1; --i) { + regular.push_back(Allocate(i)); + } + + for (const PAlloc &alloc : donated) { + // All the donated huge pages should be freeable. + EXPECT_TRUE(Delete(alloc)); + } + + for (const PAlloc &alloc : regular) { + Delete(alloc); + } +} + +std::vector FillerTest::GenerateInterestingAllocs() { + PAlloc a = Allocate(1); + EXPECT_EQ(ReleasePages(), kPagesPerHugePage - 1); + Delete(a); + // Get the report on the released page + EXPECT_EQ(ReleasePages(), 1); + + // Use a maximally-suboptimal pattern to get lots of hugepages into the + // filler. + std::vector result; + static_assert(kPagesPerHugePage > 7, "Not enough pages per hugepage!"); + for (int i = 0; i < 7; ++i) { + result.push_back(Allocate(kPagesPerHugePage - i - 1)); + } + + // Get two released hugepages. + EXPECT_EQ(ReleasePages(), 7); + EXPECT_EQ(ReleasePages(), 6); + + // Fill some of the remaining pages with small allocations. + for (int i = 0; i < 9; ++i) { + result.push_back(Allocate(1)); + } + + // Finally, donate one hugepage. + result.push_back(Allocate(1, /*donated=*/true)); + return result; +} + +// Test the output of Print(). This is something of a change-detector test, +// but that's not all bad in this case. +TEST_F(FillerTest, Print) { + if (kPagesPerHugePage != 256) { + // The output is hardcoded on this assumption, and dynamically calculating + // it would be way too much of a pain. + return; + } + auto allocs = GenerateInterestingAllocs(); + + std::string buffer(1024 * 1024, '\0'); + { + TCMalloc_Printer printer(&*buffer.begin(), buffer.size()); + filler_.Print(&printer, /*everything=*/true); + } + // Find the \0 that got added. + buffer.resize(strlen(buffer.c_str())); + + EXPECT_EQ(buffer, + R"(HugePageFiller: densely pack small requests into hugepages +HugePageFiller: 8 total, 3 full, 3 partial, 2 released, 0 quarantined +HugePageFiller: 261 pages free in 8 hugepages, 0.1274 free +HugePageFiller: among non-fulls, 0.3398 free +HugePageFiller: 2 hugepages partially released, 0.0254 released +HugePageFiller: 0.7187 of used pages hugepageable + +HugePageFiller: fullness histograms + +HugePageFiller: # of regular hps with a<= # of free pages +#include + +#include +#include +#include +#include + +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { + +// A single aligned huge page. +struct HugePage { + void *start_addr() const { + return reinterpret_cast(pn << kHugePageShift); + } + + PageID first_page() const { return pn << (kHugePageShift - kPageShift); } + + size_t index() const { return pn; } + + uintptr_t pn; +}; + +struct HugeLength { + size_t n; + + constexpr HugeLength() : n(0) {} + explicit HugeLength(double x) : n(ceil(x)) { ASSERT(x >= 0); } + constexpr size_t raw_num() const { return n; } + constexpr size_t in_bytes() const { return n * kHugePageSize; } + constexpr size_t in_mib() const { + static_assert(kHugePageSize >= 1024 * 1024, "tiny hugepages?"); + return n * (kHugePageSize / 1024 / 1024); + } + constexpr Length in_pages() const { return n * kPagesPerHugePage; } + + // It is possible to have a HugeLength that corresponds to more + // bytes than can be addressed (i.e. > size_t.) Check for that. + bool overflows() const; + + private: + explicit constexpr HugeLength(size_t x) : n(x) {} + friend constexpr HugeLength NHugePages(size_t n); +}; + +// Literal constructors (made explicit to avoid accidental uses when +// another unit was meant.) +inline constexpr HugeLength NHugePages(size_t n) { return HugeLength(n); } + +inline constexpr HugeLength HLFromBytes(size_t bytes) { + return NHugePages(bytes / kHugePageSize); +} + +// Rounds *up* to the nearest hugepage. +inline constexpr HugeLength HLFromPages(Length pages) { + return NHugePages((pages + kPagesPerHugePage - 1) / kPagesPerHugePage); +} + +inline HugeLength &operator++(HugeLength &len) { // NOLINT(runtime/references) + len.n++; + return len; +} + +inline HugePage &operator++(HugePage &p) { // NOLINT(runtime/references) + p.pn++; + return p; +} + +inline HugeLength &operator--(HugeLength &len) { // NOLINT(runtime/references) + ASSERT(len.n >= 1); + len.n--; + return len; +} + +inline constexpr bool operator<(HugeLength lhs, HugeLength rhs) { + return lhs.n < rhs.n; +} + +inline constexpr bool operator>(HugeLength lhs, HugeLength rhs) { + return lhs.n > rhs.n; +} + +inline constexpr bool operator<=(HugeLength lhs, HugeLength rhs) { + return lhs.n <= rhs.n; +} + +inline constexpr bool operator<(HugePage lhs, HugePage rhs) { + return lhs.pn < rhs.pn; +} + +inline constexpr bool operator>(HugePage lhs, HugePage rhs) { + return lhs.pn > rhs.pn; +} + +inline constexpr bool operator>=(HugeLength lhs, HugeLength rhs) { + return lhs.n >= rhs.n; +} + +inline constexpr bool operator<=(HugePage lhs, HugePage rhs) { + return lhs.pn <= rhs.pn; +} + +inline constexpr bool operator>=(HugePage lhs, HugePage rhs) { + return lhs.pn >= rhs.pn; +} + +inline constexpr bool operator==(HugePage lhs, HugePage rhs) { + return lhs.pn == rhs.pn; +} + +inline constexpr bool operator!=(HugePage lhs, HugePage rhs) { + return !(lhs == rhs); +} + +inline constexpr bool operator==(HugeLength lhs, HugeLength rhs) { + return lhs.n == rhs.n; +} + +inline constexpr bool operator!=(HugeLength lhs, HugeLength rhs) { + return lhs.n != rhs.n; +} + +inline constexpr size_t operator/(HugeLength lhs, HugeLength rhs) { + return lhs.n / rhs.n; +} + +inline constexpr HugeLength operator*(HugeLength lhs, size_t rhs) { + return NHugePages(lhs.n * rhs); +} + +inline constexpr HugeLength operator/(HugeLength lhs, size_t rhs) { + return NHugePages(lhs.n / rhs); +} + +inline HugeLength &operator*=(HugeLength &lhs, size_t rhs) { + lhs.n *= rhs; + return lhs; +} + +inline constexpr HugeLength operator%(HugeLength lhs, HugeLength rhs) { + return NHugePages(lhs.n % rhs.n); +} + +inline constexpr HugePage operator+(HugePage lhs, HugeLength rhs) { + return HugePage{lhs.pn + rhs.n}; +} + +inline constexpr HugePage operator+(HugeLength lhs, HugePage rhs) { + return rhs + lhs; +} + +inline constexpr HugePage operator-(HugePage lhs, HugeLength rhs) { + return ASSERT(lhs.pn >= rhs.n), HugePage{lhs.pn - rhs.n}; +} + +inline constexpr HugeLength operator-(HugePage lhs, HugePage rhs) { + return ASSERT(lhs.pn >= rhs.pn), NHugePages(lhs.pn - rhs.pn); +} + +inline HugePage &operator+=(HugePage &lhs, HugeLength rhs) { + lhs.pn += rhs.n; + return lhs; +} + +inline constexpr HugeLength operator+(HugeLength lhs, HugeLength rhs) { + return NHugePages(lhs.n + rhs.n); +} + +inline HugeLength &operator+=(HugeLength &lhs, HugeLength rhs) { + lhs.n += rhs.n; + return lhs; +} + +inline constexpr HugeLength operator-(HugeLength lhs, HugeLength rhs) { + return ASSERT(lhs.n >= rhs.n), NHugePages(lhs.n - rhs.n); +} + +inline HugeLength &operator-=(HugeLength &lhs, HugeLength rhs) { + ASSERT(lhs.n >= rhs.n); + lhs.n -= rhs.n; + return lhs; +} + +inline bool HugeLength::overflows() const { + return *this > HLFromBytes(std::numeric_limits::max()); +} + +inline void PrintTo(const HugeLength &n, ::std::ostream *os) { + *os << n.raw_num() << "hps"; +} + +inline HugePage HugePageContaining(PageID p) { + return {p >> (kHugePageShift - kPageShift)}; +} + +inline HugePage HugePageContaining(void *p) { + return HugePageContaining(reinterpret_cast(p) >> kPageShift); +} + +// A set of contiguous huge pages. +struct HugeRange { + void *start_addr() const { return first.start_addr(); } + void *end_addr() const { return (first + n).start_addr(); } + size_t byte_len() const { + return static_cast(end_addr()) - static_cast(start_addr()); + } + + // Assume any range starting at 0 is bogus. + bool valid() const { return first.start_addr() != nullptr; } + + constexpr HugePage start() const { return first; } + + constexpr HugeLength len() const { return n; } + + HugePage operator[](HugeLength i) const { return first + i; } + + template + friend H AbslHashValue(H h, const HugeRange &r) { + return H::combine(std::move(h), r.start().start_addr(), r.len().raw_num()); + } + + bool contains(PageID p) const { return contains(HugePageContaining(p)); } + bool contains(HugePage p) const { return p >= first && (p - first) < n; } + bool contains(HugeRange r) const { + return r.first >= first && (r.first + r.n) <= (first + n); + } + + bool intersects(HugeRange r) const { + return r.contains(start()) || contains(r.start()); + } + + // True iff r is our immediate successor (i.e. this + r is one large + // (non-overlapping) range.) + bool precedes(HugeRange r) const { return end_addr() == r.start_addr(); } + + static HugeRange Nil() { + return {HugePageContaining(nullptr), NHugePages(0)}; + } + + static HugeRange Make(HugePage p, HugeLength n) { return {p, n}; } + + HugePage first; + HugeLength n; +}; + +inline constexpr bool operator==(HugeRange lhs, HugeRange rhs) { + return lhs.start() == rhs.start() && lhs.len() == rhs.len(); +} + +// REQUIRES: a and b are disjoint but adjacent (in that order) + +inline HugeRange Join(HugeRange a, HugeRange b) { + CHECK_CONDITION(a.precedes(b)); + return {a.start(), a.len() + b.len()}; +} + +// REQUIRES r.len() >= n +// Splits r into two ranges, one of length n. The other is either the rest +// of the space (if any) or Nil. +inline std::pair Split(HugeRange r, HugeLength n) { + ASSERT(r.len() >= n); + if (r.len() > n) { + return {HugeRange::Make(r.start(), n), + HugeRange::Make(r.start() + n, r.len() - n)}; + } else { + return {r, HugeRange::Nil()}; + } +} + +} // namespace tcmalloc +#endif // TCMALLOC_HUGE_PAGES_H_ diff --git a/tcmalloc/huge_region.h b/tcmalloc/huge_region.h new file mode 100644 index 000000000..8b5028c87 --- /dev/null +++ b/tcmalloc/huge_region.h @@ -0,0 +1,553 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_HUGE_REGION_H_ +#define TCMALLOC_HUGE_REGION_H_ +#include +#include + +#include + +#include "absl/base/internal/cycleclock.h" +#include "tcmalloc/huge_allocator.h" +#include "tcmalloc/huge_page_filler.h" +#include "tcmalloc/internal/linked_list.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc { + +// Track allocations from a fixed-size multiple huge page region. +// Similar to PageTracker but a few important differences: +// - crosses multiple hugepages +// - backs region on demand +// - supports breaking up the partially-allocated region for use elsewhere +// +// This is intended to help with fast allocation of regions too large +// for HugePageFiller, but too small to round to a full hugepage; both +// lengths that do fit in a hugepage, but often wouldn't fit in +// available gaps (1.75 MiB), and lengths that don't fit, but would +// introduce unacceptable fragmentation (2.1 MiB). +// +template +class HugeRegion : public TList>::Elem { + public: + // We could template this if there was any need. + static constexpr HugeLength size() { return HLFromBytes(1024 * 1024 * 1024); } + static constexpr size_t kNumHugePages = size().raw_num(); + + // REQUIRES: r.len() == size(); r unbacked. + explicit HugeRegion(HugeRange r); + + // If available, return a range of n free pages, setting *from_released = + // true iff the returned range is currently unbacked. + // Returns false if no range available. + bool MaybeGet(Length n, PageID *p, bool *from_released); + + // Return [p, p + n) for new allocations. + // If release=true, release any hugepages made empty as a result. + // REQUIRES: [p, p + n) was the result of a previous MaybeGet. + void Put(PageID p, Length n, bool release); + + // Release any hugepages that are unused but backed. + HugeLength Release(); + + // Is p located in this region? + bool contains(PageID p) { return location_.contains(p); } + + // Stats + Length used_pages() const { return tracker_.used(); } + Length free_pages() const { + return size().in_pages() - unmapped_pages() - used_pages(); + } + Length unmapped_pages() const { return (size() - nbacked_).in_pages(); } + + void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, + PageAgeHistograms *ages) const; + + HugeLength backed() const; + + void Print(TCMalloc_Printer *out) const; + void PrintInPbtxt(PbtxtRegion *detail) const; + + BackingStats stats() const; + + // We don't define this as operator< because it's a rather specialized order. + bool BetterToAllocThan(const HugeRegion *rhs) const { + return longest_free() < rhs->longest_free(); + } + + void prepend_it(HugeRegion *other) { this->prepend(other); } + + void append_it(HugeRegion *other) { this->append(other); } + + private: + RangeTracker tracker_; + + HugeRange location_; + + static int64_t AverageWhens(Length a, int64_t a_when, Length b, + int64_t b_when) { + const double aw = static_cast(a) * a_when; + const double bw = static_cast(b) * b_when; + return static_cast((aw + bw) / (a + b)); + } + + Length longest_free() const { return tracker_.longest_free(); } + + // Adjust counts of allocs-per-hugepage for [p, p + n) being added/removed. + + // *from_released is set to true iff [p, p + n) is currently unbacked + void Inc(PageID p, Length n, bool *from_released); + // If release is true, unback any hugepage that becomes empty. + void Dec(PageID p, Length n, bool release); + + void UnbackHugepages(bool should[kNumHugePages]); + + // How many pages are used in each hugepage? + size_t pages_used_[kNumHugePages]; + // Is this hugepage backed? + bool backed_[kNumHugePages]; + HugeLength nbacked_; + int64_t whens_[kNumHugePages]; + HugeLength total_unbacked_{NHugePages(0)}; +}; + +// Manage a set of regions from which we allocate. +// Strategy: Allocate from the most fragmented region that fits. +template +class HugeRegionSet { + public: + HugeRegionSet() : n_(0) { list_.Init(); } + + // If available, return a range of n free pages, setting *from_released = + // true iff the returned range is currently unbacked. + // Returns false if no range available. + bool MaybeGet(Length n, PageID *page, bool *from_released); + + // Return an allocation to a region (if one matches!) + bool MaybePut(PageID p, Length n); + + // Add region to the set. + void Contribute(Region *region); + + // Unback any totally unused hugepages; return the number of pages + // we managed to release. + HugeLength Release(); + + void Print(TCMalloc_Printer *out) const; + void PrintInPbtxt(PbtxtRegion *hpaa) const; + void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, + PageAgeHistograms *ages) const; + BackingStats stats() const; + + private: + void Fix(Region *r) { + // We've changed r's fragmentation--move it through the list to the + // correct home (if needed). + Rise(r); + Fall(r); + } + + // Check if r has to move forward in the list. + void Rise(Region *r) { + auto prev = list_.at(r); + --prev; + if (prev == list_.end()) return; // we're at the front + if (!r->BetterToAllocThan(*prev)) return; // we're far enough forward + list_.remove(r); + for (auto iter = prev; iter != list_.end(); --iter) { + if (!r->BetterToAllocThan(*iter)) { + iter->append_it(r); + return; + } + } + list_.prepend(r); + } + + // Check if r has to move backward in the list. + void Fall(Region *r) { + auto next = list_.at(r); + ++next; + if (next == list_.end()) return; // we're at the back + if (!next->BetterToAllocThan(r)) return; // we're far enough back + list_.remove(r); + for (auto iter = next; iter != list_.end(); ++iter) { + if (!iter->BetterToAllocThan(r)) { + iter->prepend_it(r); + return; + } + } + list_.append(r); + } + + // Add r in its sorted place. + void AddToList(Region *r) { + for (Region *curr : list_) { + if (r->BetterToAllocThan(curr)) { + curr->prepend_it(r); + return; + } + } + + // Note this handles the empty-list case + list_.append(r); + } + + size_t n_; + // Sorted by longest_free increasing. + TList list_; +}; + +// REQUIRES: r.len() == size(); r unbacked. +template +inline HugeRegion::HugeRegion(HugeRange r) + : tracker_{}, + location_(r), + pages_used_{}, + backed_{}, + nbacked_(NHugePages(0)) { + int64_t now = absl::base_internal::CycleClock::Now(); + for (int i = 0; i < kNumHugePages; ++i) { + whens_[i] = now; + // These are already 0 but for clarity... + pages_used_[i] = 0; + backed_[i] = false; + } +} + +template +inline bool HugeRegion::MaybeGet(Length n, PageID *p, + bool *from_released) { + if (n > longest_free()) return false; + size_t index = tracker_.FindAndMark(n); + + PageID page = location_.start().first_page() + index; + *p = page; + + // the last hugepage we touch + Inc(page, n, from_released); + return true; +} + +// If release=true, release any hugepages made empty as a result. +template +inline void HugeRegion::Put(PageID p, Length n, bool release) { + size_t index = p - location_.start().first_page(); + tracker_.Unmark(index, n); + + Dec(p, n, release); +} + +// Release any hugepages that are unused but backed. +template +inline HugeLength HugeRegion::Release() { + HugeLength r = NHugePages(0); + bool should_unback_[kNumHugePages] = {}; + for (size_t i = 0; i < kNumHugePages; ++i) { + if (backed_[i] && pages_used_[i] == 0) { + should_unback_[i] = true; + ++r; + } + } + UnbackHugepages(should_unback_); + return r; +} + +template +inline void HugeRegion::AddSpanStats(SmallSpanStats *small, + LargeSpanStats *large, + PageAgeHistograms *ages) const { + size_t index = 0, n; + size_t f = 0, u = 0; + // This is complicated a bit by the backed/unbacked status of pages. + while (tracker_.NextFreeRange(index, &index, &n)) { + // [index, index + n) is an *unused* range. As it may cross + // hugepages, we may need to truncate it so it is either a + // *free* or a *released* range, and compute a reasonable value + // for its "when". + PageID p = location_.start().first_page() + index; + const HugePage hp = HugePageContaining(p); + size_t i = (hp - location_.start()) / NHugePages(1); + const bool backed = backed_[i]; + Length truncated = 0; + int64_t when = 0; + while (n > 0 && backed_[i] == backed) { + const PageID lim = (location_.start() + NHugePages(i + 1)).first_page(); + Length here = std::min(n, lim - p); + when = AverageWhens(truncated, when, here, whens_[i]); + truncated += here; + n -= here; + p += here; + i++; + ASSERT(i < kNumHugePages || n == 0); + } + n = truncated; + const bool released = !backed; + if (released) { + u += n; + } else { + f += n; + } + if (n < kMaxPages) { + if (small != nullptr) { + if (released) { + small->returned_length[n]++; + } else { + small->normal_length[n]++; + } + } + } else { + if (large != nullptr) { + large->spans++; + if (released) { + large->returned_pages += n; + } else { + large->normal_pages += n; + } + } + } + + if (ages != nullptr) { + ages->RecordRange(n, released, when); + } + index += n; + } + CHECK_CONDITION(f == free_pages()); + CHECK_CONDITION(u == unmapped_pages()); +} + +template +inline HugeLength HugeRegion::backed() const { + HugeLength b; + for (int i = 0; i < kNumHugePages; ++i) { + if (backed_[i]) { + ++b; + } + } + + return b; +} + +template +inline void HugeRegion::Print(TCMalloc_Printer *out) const { + const size_t kib_used = (used_pages() * kPageSize) / 1024; + const size_t kib_free = (free_pages() * kPageSize) / 1024; + const size_t kib_longest_free = (longest_free() * kPageSize) / 1024; + const HugeLength unbacked = size() - backed(); + const size_t mib_unbacked = unbacked.in_bytes() / 1024 / 1024; + out->printf( + "HugeRegion: %zu KiB used, %zu KiB free, " + "%zu KiB contiguous space, %zu MiB unbacked, " + "%zu MiB unbacked lifetime\n", + kib_used, kib_free, kib_longest_free, mib_unbacked, + total_unbacked_.in_bytes() / 1024 / 1024); +} + +template +inline void HugeRegion::PrintInPbtxt(PbtxtRegion *detail) const { + detail->PrintI64("used_bytes", used_pages() * kPageSize); + detail->PrintI64("free_bytes", free_pages() * kPageSize); + detail->PrintI64("longest_free_range_bytes", longest_free() * kPageSize); + const HugeLength unbacked = size() - backed(); + detail->PrintI64("unbacked_bytes", unbacked.in_bytes()); + detail->PrintI64("total_unbacked_bytes", total_unbacked_.in_bytes()); +} + +template +inline BackingStats HugeRegion::stats() const { + BackingStats s; + s.system_bytes = location_.len().in_bytes(); + s.free_bytes = free_pages() * kPageSize; + s.unmapped_bytes = unmapped_pages() * kPageSize; + return s; +} + +template +inline void HugeRegion::Inc(PageID p, Length n, bool *from_released) { + bool should_back = false; + const int64_t now = absl::base_internal::CycleClock::Now(); + while (n > 0) { + const HugePage hp = HugePageContaining(p); + const size_t i = (hp - location_.start()) / NHugePages(1); + const PageID lim = (hp + NHugePages(1)).first_page(); + Length here = std::min(n, lim - p); + if (pages_used_[i] == 0 && !backed_[i]) { + backed_[i] = true; + should_back = true; + ++nbacked_; + whens_[i] = now; + } + pages_used_[i] += here; + ASSERT(pages_used_[i] <= kPagesPerHugePage); + p += here; + n -= here; + } + *from_released = should_back; +} + +template +inline void HugeRegion::Dec(PageID p, Length n, bool release) { + const int64_t now = absl::base_internal::CycleClock::Now(); + bool should_unback_[kNumHugePages] = {}; + while (n > 0) { + const HugePage hp = HugePageContaining(p); + const size_t i = (hp - location_.start()) / NHugePages(1); + const PageID lim = (hp + NHugePages(1)).first_page(); + Length here = std::min(n, lim - p); + ASSERT(here > 0); + ASSERT(pages_used_[i] >= here); + ASSERT(backed_[i]); + whens_[i] = + AverageWhens(here, now, kPagesPerHugePage - pages_used_[i], whens_[i]); + pages_used_[i] -= here; + if (pages_used_[i] == 0) { + should_unback_[i] = true; + } + p += here; + n -= here; + } + if (release) { + UnbackHugepages(should_unback_); + } +} + +template +inline void HugeRegion::UnbackHugepages(bool should[kNumHugePages]) { + const int64_t now = absl::base_internal::CycleClock::Now(); + size_t i = 0; + while (i < kNumHugePages) { + if (!should[i]) { + i++; + continue; + } + size_t j = i; + while (j < kNumHugePages && should[j]) { + backed_[j] = false; + whens_[j] = now; + j++; + } + + HugeLength hl = NHugePages(j - i); + nbacked_ -= hl; + HugePage p = location_.start() + NHugePages(i); + Unback(p.start_addr(), hl.in_bytes()); + total_unbacked_ += hl; + i = j; + } +} + +// If available, return a range of n free pages, setting *from_released = +// true iff the returned range is currently unbacked. +// Returns false if no range available. +template +inline bool HugeRegionSet::MaybeGet(Length n, PageID *page, + bool *from_released) { + for (Region *region : list_) { + if (region->MaybeGet(n, page, from_released)) { + Fix(region); + return true; + } + } + return false; +} + +// Return an allocation to a region (if one matches!) +template +inline bool HugeRegionSet::MaybePut(PageID p, Length n) { + for (Region *region : list_) { + if (region->contains(p)) { + region->Put(p, n, true); + Fix(region); + return true; + } + } + + return false; +} + +// Add region to the set. +template +inline void HugeRegionSet::Contribute(Region *region) { + n_++; + AddToList(region); +} + +// Unback any totally unused hugepages; return the number of pages +// we managed to release. +template +inline HugeLength HugeRegionSet::Release() { + HugeLength hl = NHugePages(0); + for (Region *region : list_) { + hl += region->Release(); + } + + return hl; +} + +template +inline void HugeRegionSet::Print(TCMalloc_Printer *out) const { + out->printf("HugeRegionSet: 1 MiB+ allocations best-fit into %zu MiB slabs\n", + Region::size().in_bytes() / 1024 / 1024); + out->printf("HugeRegionSet: %zu total regions\n", n_); + Length total_free = 0; + HugeLength total_backed = NHugePages(0); + + for (Region *region : list_) { + region->Print(out); + total_free += region->free_pages(); + total_backed += region->backed(); + } + + out->printf("HugeRegionSet: %zu hugepages backed out of %zu total\n", + total_backed.raw_num(), Region::size().raw_num() * n_); + + const Length in_pages = total_backed.in_pages(); + out->printf("HugeRegionSet: %zu pages free in backed region, %.4f free\n", + total_free, + in_pages > 0 ? static_cast(total_free) / in_pages : 0.0); +} + +template +inline void HugeRegionSet::PrintInPbtxt(PbtxtRegion *hpaa) const { + hpaa->PrintI64("min_huge_region_alloc_size", 1024 * 1024); + hpaa->PrintI64("huge_region_size", Region::size().in_bytes()); + for (Region *region : list_) { + auto detail = hpaa->CreateSubRegion("huge_region_details"); + region->PrintInPbtxt(&detail); + } +} + +template +inline void HugeRegionSet::AddSpanStats(SmallSpanStats *small, + LargeSpanStats *large, + PageAgeHistograms *ages) const { + for (Region *region : list_) { + region->AddSpanStats(small, large, ages); + } +} + +template +inline BackingStats HugeRegionSet::stats() const { + BackingStats stats; + for (Region *region : list_) { + stats += region->stats(); + } + + return stats; +} + +} // namespace tcmalloc + +#endif // TCMALLOC_HUGE_REGION_H_ diff --git a/tcmalloc/huge_region_test.cc b/tcmalloc/huge_region_test.cc new file mode 100644 index 000000000..baf8b08f5 --- /dev/null +++ b/tcmalloc/huge_region_test.cc @@ -0,0 +1,556 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/huge_region.h" + +#include +#include + +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/random/random.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/common.h" +#include "tcmalloc/huge_pages.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc { +namespace { + +using testing::NiceMock; +using testing::StrictMock; + +class HugeRegionTest : public ::testing::Test { + protected: + HugeRegionTest() + : // an unlikely magic page + p_(HugePageContaining(reinterpret_cast(0x1faced200000))), + region_({p_, region_.size()}) { + // we usually don't care about backing calls, unless testing that + // specifically. + mock_ = absl::make_unique>(); + } + + ~HugeRegionTest() override { mock_.reset(nullptr); } + + // This is wordy, but necessary for mocking: + class BackingInterface { + public: + virtual void Unback(void *p, size_t len) = 0; + virtual ~BackingInterface() {} + }; + + class MockBackingInterface : public BackingInterface { + public: + MOCK_METHOD2(Unback, void(void *p, size_t len)); + }; + + static std::unique_ptr mock_; + + static void MockUnback(void *p, size_t len) { mock_->Unback(p, len); } + + void CheckMock() { testing::Mock::VerifyAndClearExpectations(mock_.get()); } + + void ExpectUnback(HugeRange r) { + void *ptr = r.start_addr(); + size_t bytes = r.byte_len(); + EXPECT_CALL(*mock_, Unback(ptr, bytes)).Times(1); + } + + struct Alloc { + PageID p; + Length n; + size_t mark; + }; + + HugePage p_; + typedef HugeRegion Region; + Region region_; + size_t next_mark_{0}; + size_t marks_[Region::size().in_pages()]; + + void Mark(Alloc a) { + EXPECT_LE(p_.first_page(), a.p); + size_t index = a.p - p_.first_page(); + size_t end = index + a.n; + EXPECT_LE(end, region_.size().in_pages()); + for (; index < end; ++index) { + marks_[index] = a.mark; + } + } + + void Check(Alloc a) { + EXPECT_LE(p_.first_page(), a.p); + size_t index = a.p - p_.first_page(); + size_t end = index + a.n; + EXPECT_LE(end, region_.size().in_pages()); + for (; index < end; ++index) { + EXPECT_EQ(a.mark, marks_[index]); + } + } + + Alloc Allocate(Length n) { + bool from_released; + return Allocate(n, &from_released); + } + + Alloc Allocate(Length n, bool *from_released) { + Alloc ret; + CHECK_CONDITION(region_.MaybeGet(n, &ret.p, from_released)); + ret.n = n; + ret.mark = ++next_mark_; + Mark(ret); + return ret; + } + + void Delete(Alloc a) { + Check(a); + region_.Put(a.p, a.n, false); + } + + void DeleteUnback(Alloc a) { + Check(a); + region_.Put(a.p, a.n, true); + } +}; + +std::unique_ptr HugeRegionTest::mock_; + +TEST_F(HugeRegionTest, Basic) { + Length total = 0; + std::vector allocs; + for (Length n = 1; total + n < region_.size().in_pages(); ++n) { + allocs.push_back(Allocate(n)); + total += n; + EXPECT_EQ(total, region_.used_pages()); + } + + // Free every other alloc + std::vector lengths; + std::vector new_allocs; + for (Length j = 0; j < allocs.size(); ++j) { + if (j % 2 == 0) { + new_allocs.push_back(allocs[j]); + continue; + } + Length n = allocs[j].n; + Delete(allocs[j]); + total -= n; + EXPECT_EQ(total, region_.used_pages()); + lengths.push_back(n); + } + allocs.swap(new_allocs); + // and reallocate them in a random order: + std::shuffle(lengths.begin(), lengths.end(), absl::BitGen()); + // This should fit, since thge allocator is best-fit + // and we have unique gaps of each size. + for (auto n : lengths) { + allocs.push_back(Allocate(n)); + total += n; + EXPECT_EQ(total, region_.used_pages()); + } + + for (auto a : allocs) { + Delete(a); + } +} + +TEST_F(HugeRegionTest, ReqsBacking) { + const Length n = kPagesPerHugePage; + std::vector allocs; + // should back the first page + bool from_released; + allocs.push_back(Allocate(n - 1, &from_released)); + EXPECT_TRUE(from_released); + // nothing + allocs.push_back(Allocate(1, &from_released)); + EXPECT_FALSE(from_released); + // second page + allocs.push_back(Allocate(1, &from_released)); + EXPECT_TRUE(from_released); + // third, fourth, fifth + allocs.push_back(Allocate(3 * n, &from_released)); + EXPECT_TRUE(from_released); + + for (auto a : allocs) { + Delete(a); + } +} + +TEST_F(HugeRegionTest, Release) { + mock_ = absl::make_unique>(); + const Length n = kPagesPerHugePage; + bool from_released; + auto a = Allocate(n * 4 - 1, &from_released); + EXPECT_TRUE(from_released); + + auto b = Allocate(n * 3, &from_released); + EXPECT_TRUE(from_released); + + auto c = Allocate(n * 5 + 1, &from_released); + EXPECT_TRUE(from_released); + + auto d = Allocate(n * 2, &from_released); + EXPECT_TRUE(from_released); + + auto e = Allocate(n / 2, &from_released); + EXPECT_TRUE(from_released); + auto f = Allocate(n / 2, &from_released); + EXPECT_FALSE(from_released); + + // Don't unback the first or last hugepage this touches -- since they + // overlap with others. + Delete(b); + ExpectUnback({p_ + NHugePages(4), NHugePages(2)}); + EXPECT_EQ(NHugePages(2), region_.Release()); + CheckMock(); + + // Now we're on exact boundaries so we should unback the whole range. + Delete(d); + ExpectUnback({p_ + NHugePages(12), NHugePages(2)}); + EXPECT_EQ(NHugePages(2), region_.Release()); + CheckMock(); + + Delete(a); + ExpectUnback({p_ + NHugePages(0), NHugePages(4)}); + EXPECT_EQ(NHugePages(4), region_.Release()); + CheckMock(); + + // Should work just as well with aggressive Put(): + ExpectUnback({p_ + NHugePages(6), NHugePages(6)}); + DeleteUnback(c); + CheckMock(); + + // And this _shouldn't_ do anything (page still in use) + DeleteUnback(e); + // But this should: + ExpectUnback({p_ + NHugePages(14), NHugePages(1)}); + DeleteUnback(f); + CheckMock(); +} + +TEST_F(HugeRegionTest, Reback) { + mock_ = absl::make_unique>(); + const Length n = kPagesPerHugePage / 4; + bool from_released; + // Even in back/unback cycles we should still call the functions + // on every transition. + for (int i = 0; i < 20; ++i) { + std::vector allocs; + allocs.push_back(Allocate(n, &from_released)); + EXPECT_TRUE(from_released); + allocs.push_back(Allocate(n, &from_released)); + EXPECT_FALSE(from_released); + allocs.push_back(Allocate(n, &from_released)); + EXPECT_FALSE(from_released); + allocs.push_back(Allocate(n, &from_released)); + EXPECT_FALSE(from_released); + + std::shuffle(allocs.begin(), allocs.end(), absl::BitGen()); + DeleteUnback(allocs[0]); + DeleteUnback(allocs[1]); + DeleteUnback(allocs[2]); + + ExpectUnback({p_, NHugePages(1)}); + DeleteUnback(allocs[3]); + CheckMock(); + } +} + +TEST_F(HugeRegionTest, Stats) { + const Length kLen = region_.size().in_pages(); + const size_t kBytes = kLen * kPageSize; + struct Helper { + static void Stat(const Region ®ion, std::vector *small_backed, + std::vector *small_unbacked, LargeSpanStats *large, + BackingStats *stats, double *avg_age_backed, + double *avg_age_unbacked) { + SmallSpanStats small; + memset(&small, 0, sizeof(small)); + memset(large, 0, sizeof(*large)); + PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); + region.AddSpanStats(&small, large, &ages); + small_backed->clear(); + small_unbacked->clear(); + for (int i = 0; i < kMaxPages; ++i) { + for (int j = 0; j < small.normal_length[i]; ++j) { + small_backed->push_back(i); + } + + for (int j = 0; j < small.returned_length[i]; ++j) { + small_unbacked->push_back(i); + } + } + + *stats = region.stats(); + + *avg_age_backed = ages.GetTotalHistogram(false)->avg_age(); + *avg_age_unbacked = ages.GetTotalHistogram(true)->avg_age(); + } + }; + + LargeSpanStats large; + std::vector small_backed, small_unbacked; + BackingStats stats; + double avg_age_backed, avg_age_unbacked; + + absl::SleepFor(absl::Milliseconds(10)); + Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats, + &avg_age_backed, &avg_age_unbacked); + EXPECT_THAT(small_backed, testing::ElementsAre()); + EXPECT_THAT(small_unbacked, testing::ElementsAre()); + EXPECT_EQ(1, large.spans); + EXPECT_EQ(0, large.normal_pages); + EXPECT_EQ(kLen, large.returned_pages); + EXPECT_EQ(kBytes, stats.system_bytes); + EXPECT_EQ(0, stats.free_bytes); + EXPECT_EQ(kBytes, stats.unmapped_bytes); + EXPECT_LE(0.01, avg_age_unbacked); + EXPECT_EQ(0, avg_age_backed); + + // We don't, in production, use small allocations from the region, but + // the API supports it, so test it here. + Alloc a = Allocate(1); + Allocate(1); + Alloc b = Allocate(2); + Alloc barrier = Allocate(1); + Alloc c = Allocate(3); + Allocate(1); + const Length slack = kPagesPerHugePage - 9; + + absl::SleepFor(absl::Milliseconds(20)); + Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats, + &avg_age_backed, &avg_age_unbacked); + EXPECT_THAT(small_backed, testing::ElementsAre()); + EXPECT_THAT(small_unbacked, testing::ElementsAre()); + EXPECT_EQ(2, large.spans); + EXPECT_EQ(slack, large.normal_pages); + EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages); + EXPECT_EQ(kBytes, stats.system_bytes); + EXPECT_EQ(slack * kPageSize, stats.free_bytes); + EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes); + EXPECT_LE(0.02, avg_age_backed); + EXPECT_LE(0.03, avg_age_unbacked); + + Delete(a); + absl::SleepFor(absl::Milliseconds(30)); + Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats, + &avg_age_backed, &avg_age_unbacked); + EXPECT_THAT(small_backed, testing::ElementsAre(1)); + EXPECT_THAT(small_unbacked, testing::ElementsAre()); + EXPECT_EQ(2, large.spans); + EXPECT_EQ(slack, large.normal_pages); + EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages); + EXPECT_EQ(kBytes, stats.system_bytes); + EXPECT_EQ((slack + 1) * kPageSize, stats.free_bytes); + EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes); + EXPECT_LE((slack * 0.05 + 1 * 0.03) / (slack + 1), avg_age_backed); + EXPECT_LE(0.06, avg_age_unbacked); + + Delete(b); + absl::SleepFor(absl::Milliseconds(40)); + Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats, + &avg_age_backed, &avg_age_unbacked); + EXPECT_THAT(small_backed, testing::ElementsAre(1, 2)); + EXPECT_THAT(small_unbacked, testing::ElementsAre()); + EXPECT_EQ(2, large.spans); + EXPECT_EQ(slack, large.normal_pages); + EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages); + EXPECT_EQ(kBytes, stats.system_bytes); + EXPECT_EQ((slack + 3) * kPageSize, stats.free_bytes); + EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes); + EXPECT_LE((slack * 0.09 + 1 * 0.07 + 2 * 0.04) / (slack + 3), avg_age_backed); + EXPECT_LE(0.10, avg_age_unbacked); + + Delete(c); + absl::SleepFor(absl::Milliseconds(50)); + Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats, + &avg_age_backed, &avg_age_unbacked); + EXPECT_THAT(small_backed, testing::ElementsAre(1, 2, 3)); + EXPECT_THAT(small_unbacked, testing::ElementsAre()); + EXPECT_EQ(2, large.spans); + EXPECT_EQ(slack, large.normal_pages); + EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages); + EXPECT_EQ(kBytes, stats.system_bytes); + EXPECT_EQ((slack + 6) * kPageSize, stats.free_bytes); + EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes); + EXPECT_LE((slack * 0.14 + 1 * 0.12 + 2 * 0.09 + 3 * 0.05) / (slack + 6), + avg_age_backed); + EXPECT_LE(0.15, avg_age_unbacked); + + Delete(barrier); + absl::SleepFor(absl::Milliseconds(60)); + Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats, + &avg_age_backed, &avg_age_unbacked); + EXPECT_THAT(small_backed, testing::ElementsAre(1, 6)); + EXPECT_THAT(small_unbacked, testing::ElementsAre()); + EXPECT_EQ(2, large.spans); + EXPECT_EQ(slack, large.normal_pages); + EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages); + EXPECT_EQ(kBytes, stats.system_bytes); + EXPECT_EQ((slack + 7) * kPageSize, stats.free_bytes); + EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes); + EXPECT_LE( + (slack * 0.20 + 1 * 0.18 + 2 * 0.15 + 3 * 0.11 + 1 * 0.06) / (slack + 7), + avg_age_backed); + EXPECT_LE(0.21, avg_age_unbacked); +} + +// Test that free regions are broken down properly when they cross +// page boundaries that change the backed/unbacked state. +TEST_F(HugeRegionTest, StatBreakdown) { + const Length n = kPagesPerHugePage; + Alloc a = Allocate(n / 4); + Alloc b = Allocate(n * 3 + n / 3); + Alloc c = Allocate((n - n / 3 - n / 4) + n * 5 + n / 5); + Alloc d = Allocate(n - (n / 5) - 1); + // This unbacks the middle 2 hugepages, but not the beginning or + // trailing region + DeleteUnback(b); + Delete(c); + SmallSpanStats small; + LargeSpanStats large; + memset(&small, 0, sizeof(small)); + memset(&large, 0, sizeof(large)); + region_.AddSpanStats(&small, &large, nullptr); + // Backed beginning of hugepage 0, unbacked range in middle of b, + // long backed range from c, unbacked tail of allocation. + EXPECT_EQ(4, large.spans); + // Tail end of A's page, B/C combined page + all of C. + EXPECT_EQ((n - n / 4) + n * 6 + (n / 5), large.normal_pages); + // The above fill up 10 total pages. + EXPECT_EQ(2 * n + (Region::size().raw_num() - 10) * n, large.returned_pages); + EXPECT_EQ(1, small.normal_length[1]); + + EXPECT_EQ( + 1 + large.normal_pages + large.returned_pages + region_.used_pages(), + Region::size().in_pages()); + Delete(a); + Delete(d); +} + +static void NilUnback(void *p, size_t bytes) {} + +class HugeRegionSetTest : public testing::Test { + protected: + // These regions are backed by "real" memory, but we don't touch it. + typedef HugeRegion Region; + + HugeRegionSetTest() { next_ = HugePageContaining(nullptr); } + + std::unique_ptr GetRegion() { + std::unique_ptr r(new Region({next_, Region::size()})); + next_ += Region::size(); + return r; + } + + HugeRegionSet set_; + HugePage next_; + + struct Alloc { + PageID p; + Length n; + }; +}; + +TEST_F(HugeRegionSetTest, Set) { + absl::BitGen rng; + PageID p; + Length kSize = kPagesPerHugePage + 1; + bool from_released; + ASSERT_FALSE(set_.MaybeGet(1, &p, &from_released)); + auto r1 = GetRegion(); + auto r2 = GetRegion(); + auto r3 = GetRegion(); + auto r4 = GetRegion(); + set_.Contribute(r1.get()); + set_.Contribute(r2.get()); + set_.Contribute(r3.get()); + set_.Contribute(r4.get()); + + std::vector allocs; + std::vector doomed; + + while (set_.MaybeGet(kSize, &p, &from_released)) { + allocs.push_back({p, kSize}); + } + + // Define a random set by shuffling, then move half of the allocations into + // doomed. + std::shuffle(allocs.begin(), allocs.end(), rng); + doomed.insert(doomed.begin(), allocs.begin() + allocs.size() / 2, + allocs.end()); + allocs.erase(allocs.begin() + allocs.size() / 2, allocs.end()); + + for (auto d : doomed) { + ASSERT_TRUE(set_.MaybePut(d.p, d.n)); + } + + for (size_t i = 0; i < 100 * 1000; ++i) { + const size_t N = allocs.size(); + size_t index = absl::Uniform(rng, 0, N); + std::swap(allocs[index], allocs[N - 1]); + auto a = allocs.back(); + ASSERT_TRUE(set_.MaybePut(a.p, a.n)); + allocs.pop_back(); + ASSERT_TRUE(set_.MaybeGet(kSize, &p, &from_released)); + allocs.push_back({p, kSize}); + } + + // Random traffic should have defragmented our allocations into full + // and empty regions, and released the empty ones. Annoyingly, we don't + // know which region is which, so we have to do a bit of silliness: + std::vector regions = {r1.get(), r2.get(), r3.get(), r4.get()}; + std::sort(regions.begin(), regions.end(), + [](const Region *a, const Region *b) -> bool { + return a->used_pages() > b->used_pages(); + }); + + for (int i = 0; i < regions.size(); i++) { + tcmalloc::Log(tcmalloc::kLog, __FILE__, __LINE__, i, + regions[i]->used_pages(), regions[i]->free_pages(), + regions[i]->unmapped_pages()); + } + // Now first two should be "full" (ish) + EXPECT_LE(Region::size().in_pages() * 0.9, regions[0]->used_pages()); + EXPECT_LE(Region::size().in_pages() * 0.9, regions[1]->used_pages()); + // and last two "empty" (ish.) + EXPECT_LE(Region::size().in_pages() * 0.9, regions[2]->unmapped_pages()); + EXPECT_LE(Region::size().in_pages() * 0.9, regions[3]->unmapped_pages()); + + // Check the stats line up. + auto stats = set_.stats(); + auto raw = r1->stats(); + raw += r2->stats(); + raw += r3->stats(); + raw += r4->stats(); + EXPECT_EQ(raw.system_bytes, stats.system_bytes); + EXPECT_EQ(raw.unmapped_bytes, stats.unmapped_bytes); + EXPECT_EQ(raw.free_bytes, stats.free_bytes); + + // Print out the stats for inspection of formats. + std::vector buf(64 * 1024); + TCMalloc_Printer out(&buf[0], buf.size()); + set_.Print(&out); + printf("%s\n", &buf[0]); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/internal/BUILD b/tcmalloc/internal/BUILD new file mode 100644 index 000000000..3fcc49b45 --- /dev/null +++ b/tcmalloc/internal/BUILD @@ -0,0 +1,314 @@ +# Copyright 2019 The TCMalloc Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Internal libraries used for the implementation and testing of TCMalloc. + +load("//tcmalloc:copts.bzl", "TCMALLOC_DEFAULT_COPTS") + +package(default_visibility = ["//visibility:private"]) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "atomic_danger", + hdrs = ["atomic_danger.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], +) + +cc_library( + name = "atomic_stats_counter", + hdrs = ["atomic_stats_counter.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + "@com_google_absl//absl/base:core_headers", + ], +) + +cc_library( + name = "bits", + hdrs = ["bits.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":logging", + "@com_google_absl//absl/base:core_headers", + ], +) + +cc_test( + name = "bits_test", + srcs = ["bits_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":bits", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "config", + hdrs = ["config.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], +) + +cc_library( + name = "declarations", + hdrs = ["declarations.h"], + visibility = [ + "//tcmalloc:__subpackages__", + ], +) + +cc_library( + name = "linked_list", + hdrs = ["linked_list.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + "//tcmalloc/internal:logging", + "@com_google_absl//absl/base:core_headers", + ], +) + +cc_test( + name = "linked_list_test", + size = "small", + srcs = ["linked_list_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = [ + ":linked_list", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/container:node_hash_set", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "linux_syscall_support", + hdrs = ["linux_syscall_support.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], +) + +cc_library( + name = "logging", + srcs = ["logging.cc"], + hdrs = ["logging.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":parameter_accessors", + "//tcmalloc:malloc_extension", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/debugging:stacktrace", + "@com_google_absl//absl/strings", + ], +) + +cc_test( + name = "logging_test", + srcs = ["logging_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base:core_headers", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "memory_stats", + srcs = ["memory_stats.cc"], + hdrs = ["memory_stats.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":logging", + ":util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + ], +) + +cc_test( + name = "memory_stats_test", + srcs = ["memory_stats_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":memory_stats", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base:core_headers", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "mincore", + srcs = ["mincore.cc"], + hdrs = ["mincore.h"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = [ + "//tcmalloc:__subpackages__", + ], +) + +cc_test( + name = "mincore_test", + srcs = ["mincore_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = [ + ":logging", + ":mincore", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "parameter_accessors", + hdrs = ["parameter_accessors.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/types:optional", + ], +) + +cc_library( + name = "percpu", + srcs = [ + "percpu.cc", + "percpu_rseq_asm.S", + "percpu_rseq_unsupported.cc", + ], + hdrs = ["percpu.h"], + copts = TCMALLOC_DEFAULT_COPTS, + textual_hdrs = [ + "percpu_rseq_ppc.S", + "percpu_rseq_x86_64.S", + ], + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":atomic_danger", + ":config", + ":linux_syscall_support", + ":logging", + ":util", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:dynamic_annotations", + ], +) + +cc_library( + name = "proc_maps", + srcs = ["proc_maps.cc"], + hdrs = ["proc_maps.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":logging", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings:str_format", + ], +) + +cc_library( + name = "range_tracker", + hdrs = ["range_tracker.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":logging", + ], +) + +cc_test( + name = "range_tracker_test", + srcs = ["range_tracker_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = [ + ":range_tracker", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:fixed_array", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:distributions", + "@com_google_googletest//:gtest_main", + ], +) + +# An empty rule to force libc malloc instead of TCMalloc. +cc_library( + name = "system_malloc", + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = [ + "//tcmalloc:__subpackages__", + ], +) + +cc_library( + name = "util", + srcs = ["util.cc"], + hdrs = ["util.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__subpackages__", + ], + deps = [ + ":logging", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", + ], +) diff --git a/tcmalloc/internal/atomic_danger.h b/tcmalloc/internal/atomic_danger.h new file mode 100644 index 000000000..8367f0118 --- /dev/null +++ b/tcmalloc/internal/atomic_danger.h @@ -0,0 +1,56 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The routines exported by this module are subtle and dangerous. + +#ifndef TCMALLOC_INTERNAL_ATOMIC_DANGER_H_ +#define TCMALLOC_INTERNAL_ATOMIC_DANGER_H_ + +#include +#include + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace atomic_danger { + +// Casts the address of a std::atomic to the address of an IntType. +// +// This is almost certainly not the function you are looking for! It is +// undefined behavior, as the object under a std::atomic isn't +// fundamentally an int. This function is intended for passing the address of an +// atomic integer to syscalls or for assembly interpretation. +// +// Callers should be migrated if C++ standardizes a better way to do this: +// * http://wg21.link/n4013 (Atomic operations on non-atomic data) +// * http://wg21.link/p0019 (Atomic Ref, merged into C++20) +// * http://wg21.link/p1478 (Byte-wise atomic memcpy) +template +IntType* CastToIntegral(std::atomic* atomic_for_syscall) { + static_assert(std::is_integral::value, + "CastToIntegral must be instantiated with an integral type."); +#if __cpp_lib_atomic_is_always_lock_free >= 201603 + static_assert(std::atomic::is_always_lock_free, + "CastToIntegral must be instantiated with a lock-free type."); +#else + static_assert(__atomic_always_lock_free(sizeof(IntType), + nullptr /* typical alignment */), + "CastToIntegral must be instantiated with a lock-free type."); +#endif + return reinterpret_cast(atomic_for_syscall); +} +} // namespace atomic_danger +} // namespace tcmalloc_internal +} // namespace tcmalloc + +#endif // TCMALLOC_INTERNAL_ATOMIC_DANGER_H_ diff --git a/tcmalloc/internal/atomic_stats_counter.h b/tcmalloc/internal/atomic_stats_counter.h new file mode 100644 index 000000000..e20f5aad4 --- /dev/null +++ b/tcmalloc/internal/atomic_stats_counter.h @@ -0,0 +1,84 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_ATOMIC_STATS_COUNTER_H_ +#define TCMALLOC_INTERNAL_ATOMIC_STATS_COUNTER_H_ + +#include + +#include "absl/base/macros.h" + +namespace tcmalloc { + +class CentralFreeList; + +namespace tcmalloc_internal { + +class StatsCounter { + public: + constexpr StatsCounter() : value_(0) {} + StatsCounter(const StatsCounter&) = delete; + StatsCounter& operator=(const StatsCounter&) = delete; + + ~StatsCounter() = default; + + using Value = int64_t; + + // Add "increment" to this statistics counter. + // "increment" may take any value, including negative ones. + // Counts are not lost in the face of concurrent uses of Add(). + // Counts added by this call may be lost in the face of concurrent calls + // by other calls, such as Clear() or LossyAdd(). + // This call is suitable for maintaining statistics. It is not suitable + // for other purposes; in particular, it should not be used for + // data synchronization, generating sequence numbers, or reference counting. + void Add(Value increment) { + // As always, clients may not assume properties implied by the + // implementation, which may change. + this->value_.fetch_add(increment, std::memory_order_relaxed); + } + + // Clear the counter to zero. Equivalent to atomically executing + // this->Add(-this->value()). + void Clear() { this->value_.store(0, std::memory_order_relaxed); } + + // Return the current value of the counter. + Value value() const { return this->value_.load(std::memory_order_relaxed); } + + // Add "increment" to this lossy statistics counter. Counts (including those + // added by other calls) _may be lost_ if this call is used concurrently with + // other calls to LossyAdd() or Add(). This call is suitable for maintaining + // statistics where performance is more important than not losing counts. It + // is not suitable for other purposes; in particular, it should not be used + // for data synchronization, generating sequence numbers, or reference + // counting. + void LossyAdd(Value increment) { + this->value_.store(this->value_.load(std::memory_order_relaxed) + increment, + std::memory_order_relaxed); + } + + private: + friend class tcmalloc::CentralFreeList; + + // Deprecated constructor, currently only used by tcmalloc. + explicit StatsCounter(absl::base_internal::LinkerInitialized x) { + } // value_ is zero + + std::atomic value_; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc + +#endif // TCMALLOC_INTERNAL_ATOMIC_STATS_COUNTER_H_ diff --git a/tcmalloc/internal/bits.h b/tcmalloc/internal/bits.h new file mode 100644 index 000000000..11c338f20 --- /dev/null +++ b/tcmalloc/internal/bits.h @@ -0,0 +1,57 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_BITS_H_ +#define TCMALLOC_INTERNAL_BITS_H_ + +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { +namespace tcmalloc_internal { + +class Bits { + public: + static constexpr int Log2Floor(uint32_t n) { +#if defined(__GNUC__) + return n == 0 ? -1 : 31 ^ __builtin_clz(n); +#else + if (n == 0) return -1; + int log = 0; + uint32_t value = n; + for (int i = 4; i >= 0; --i) { + int shift = (1 << i); + uint32_t x = value >> shift; + if (x != 0) { + value = x; + log += shift; + } + } + ASSERT(value == 1); + return log; +#endif + } + + static constexpr int Log2Ceiling(uint32_t n) { + int floor = Log2Floor(n); + if ((n & (n - 1)) == 0) // zero or a power of two + return floor; + else + return floor + 1; + } +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc + +#endif // TCMALLOC_INTERNAL_BITS_H_ diff --git a/tcmalloc/internal/bits_test.cc b/tcmalloc/internal/bits_test.cc new file mode 100644 index 000000000..99bd4db98 --- /dev/null +++ b/tcmalloc/internal/bits_test.cc @@ -0,0 +1,62 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/bits.h" + +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/random/random.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +TEST(BitsTest, Log2EdgeCases) { + EXPECT_EQ(-1, Bits::Log2Floor(0)); + EXPECT_EQ(-1, Bits::Log2Ceiling(0)); + + for (int i = 0; i < 32; i++) { + uint32_t n = 1U << i; + EXPECT_EQ(i, Bits::Log2Floor(n)); + EXPECT_EQ(i, Bits::Log2Ceiling(n)); + if (n > 2) { + EXPECT_EQ(i - 1, Bits::Log2Floor(n - 1)); + EXPECT_EQ(i, Bits::Log2Floor(n + 1)); + EXPECT_EQ(i, Bits::Log2Ceiling(n - 1)); + EXPECT_EQ(i + 1, Bits::Log2Ceiling(n + 1)); + } + } +} + +TEST(BitsTest, Log2Random) { + absl::BitGen random; + + const int kNumIterations = 10000; + for (int i = 0; i < kNumIterations; i++) { + int maxbit = -1; + uint32_t n = 0; + while (!absl::Bernoulli(random, 1.0 / 32)) { + int bit = absl::Uniform(random, 0, 32); + n |= (1U << bit); + maxbit = std::max(bit, maxbit); + } + EXPECT_EQ(maxbit, Bits::Log2Floor(n)); + } +} + +} // namespace +} // nmaespace tcmalloc_internal +} // namespace tcmalloc diff --git a/tcmalloc/internal/config.h b/tcmalloc/internal/config.h new file mode 100644 index 000000000..a2ba16ab6 --- /dev/null +++ b/tcmalloc/internal/config.h @@ -0,0 +1,26 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_CONFIG_H_ +#define TCMALLOC_INTERNAL_CONFIG_H_ + +// TCMALLOC_HAVE_SCHED_GETCPU is defined when the system implements +// sched_getcpu(3) as by glibc and it's imitators. +#if defined(__linux__) || defined(__ros__) +#define TCMALLOC_HAVE_SCHED_GETCPU 1 +#else +#undef TCMALLOC_HAVE_SCHED_GETCPU +#endif + +#endif // TCMALLOC_INTERNAL_CONFIG_H_ diff --git a/tcmalloc/internal/declarations.h b/tcmalloc/internal/declarations.h new file mode 100644 index 000000000..b82a3ce9e --- /dev/null +++ b/tcmalloc/internal/declarations.h @@ -0,0 +1,42 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// These declarations are for internal use, allowing us to have access to +// allocation functions whose declarations are not provided by the standard +// library. +#ifndef TCMALLOC_INTERNAL_DECLARATIONS_H_ +#define TCMALLOC_INTERNAL_DECLARATIONS_H_ + +#include +#include + +namespace std { +enum class align_val_t : size_t; +} // namespace std + +void* operator new(std::size_t, std::align_val_t); +void* operator new(std::size_t, std::align_val_t, + const std::nothrow_t&) noexcept; +void* operator new[](std::size_t, std::align_val_t); +void* operator new[](std::size_t, std::align_val_t, + const std::nothrow_t&) noexcept; + +void operator delete(void*, std::align_val_t) noexcept; +void operator delete(void*, std::size_t) noexcept; +void operator delete(void*, std::size_t, std::align_val_t) noexcept; +void operator delete[](void*, std::align_val_t) noexcept; +void operator delete[](void*, std::size_t) noexcept; +void operator delete[](void*, std::size_t, std::align_val_t) noexcept; + +#endif // TCMALLOC_INTERNAL_DECLARATIONS_H_ diff --git a/tcmalloc/internal/linked_list.h b/tcmalloc/internal/linked_list.h new file mode 100644 index 000000000..a0bfabd22 --- /dev/null +++ b/tcmalloc/internal/linked_list.h @@ -0,0 +1,246 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Some very basic linked list functions for dealing with using void * as +// storage. + +#ifndef TCMALLOC_INTERNAL_LINKED_LIST_H_ +#define TCMALLOC_INTERNAL_LINKED_LIST_H_ + +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/optimization.h" +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void *SLL_Next(void *t) { + return *(reinterpret_cast(t)); +} + +inline void ABSL_ATTRIBUTE_ALWAYS_INLINE SLL_SetNext(void *t, void *n) { + *(reinterpret_cast(t)) = n; +} + +inline void ABSL_ATTRIBUTE_ALWAYS_INLINE SLL_Push(void **list, void *element) { + SLL_SetNext(element, *list); + *list = element; +} + +inline void *SLL_Pop(void **list) { + void *result = *list; + void *next = SLL_Next(*list); + *list = next; + // Prefetching NULL leads to a DTLB miss, thus only prefetch when 'next' + // is not NULL. +#if defined(__GNUC__) + if (next) { + __builtin_prefetch(next, 0, 3); + } +#endif + return result; +} + +// LinkedList forms an in-place linked list with its void* elements. +class LinkedList { + private: + void *list_; // Linked list. + uint32_t length_; // Current length. + + public: + void Init() { + list_ = nullptr; + length_ = 0; + } + + // Return current length of list + size_t length() const { return length_; } + + // Is list empty? + bool empty() const { return list_ == nullptr; } + + void ABSL_ATTRIBUTE_ALWAYS_INLINE Push(void *ptr) { + SLL_Push(&list_, ptr); + length_++; + } + + bool ABSL_ATTRIBUTE_ALWAYS_INLINE TryPop(void **ret) { + void *obj = list_; + if (ABSL_PREDICT_FALSE(obj == nullptr)) { + return false; + } + + void *next = SLL_Next(obj); + list_ = next; + length_--; + +#if defined(__GNUC__) + if (ABSL_PREDICT_TRUE(next)) { + __builtin_prefetch(next, 0, 0); + } +#endif + + *ret = obj; + return true; + } + + // PushBatch and PopBatch do not guarantee an ordering. + void PushBatch(int N, void **batch) { + ASSERT(N > 0); + for (int i = 0; i < N - 1; ++i) { + SLL_SetNext(batch[i], batch[i + 1]); + } + SLL_SetNext(batch[N - 1], list_); + list_ = batch[0]; + length_ += N; + } + + void PopBatch(int N, void **batch) { + void *p = list_; + for (int i = 0; i < N; ++i) { + batch[i] = p; + p = SLL_Next(p); + } + list_ = p; + ASSERT(length_ >= N); + length_ -= N; + } +}; + +// A well-typed intrusive doubly linked list. +template +class TList { + private: + class Iter; + + public: + // The intrusive element supertype. Use the CRTP to declare your class: + // class MyListItems : public TList::Elem { ... + class Elem { + friend class Iter; + friend class TList; + Elem *next_; + Elem *prev_; + + protected: + // Returns true iff the list is empty after removing this + bool remove() { + // Copy out next/prev before doing stores, otherwise compiler assumes + // potential aliasing and does unnecessary reloads after stores. + Elem *next = next_; + Elem *prev = prev_; + ASSERT(prev->next_ == this); + prev->next_ = next; + ASSERT(next->prev_ == this); + next->prev_ = prev; +#ifndef NDEBUG + prev_ = nullptr; + next_ = nullptr; +#endif + return next == prev; + } + + void prepend(Elem *item) { + Elem *prev = prev_; + item->prev_ = prev; + item->next_ = this; + prev->next_ = item; + prev_ = item; + } + + void append(Elem *item) { + Elem *next = next_; + item->next_ = next; + item->prev_ = this; + next->prev_ = item; + next_ = item; + } + }; + + // Initialize to empty list. + void Init() { head_.next_ = head_.prev_ = &head_; } + + bool empty() const { return head_.next_ == &head_; } + + // Return the length of the linked list. O(n). + size_t length() const { + size_t result = 0; + for (Elem *e = head_.next_; e != &head_; e = e->next_) { + result++; + } + return result; + } + + // Returns first element in the list. The list must not be empty. + T *first() const { + ASSERT(!empty()); + return static_cast(head_.next_); + } + + // Returns last element in the list. The list must not be empty. + T *last() const { + ASSERT(!empty()); + return static_cast(head_.prev_); + } + + // Add item to the front of list. + void prepend(T *item) { head_.append(item); } + + void append(T *item) { head_.prepend(item); } + + bool remove(T *item) { + // must be on the list; we don't check. + return item->remove(); + } + + // Support for range-based iteration over a list. + Iter begin() const { return Iter(head_.next_); } + Iter end() const { return Iter(const_cast(&head_)); } + + // Iterator pointing to a given list item. + // REQUIRES: item is a member of the list. + Iter at(T *item) const { return Iter(item); } + + private: + // Support for range-based iteration over a list. + class Iter { + friend class TList; + Elem *elem_; + explicit Iter(Elem *elem) : elem_(elem) {} + + public: + Iter &operator++() { + elem_ = elem_->next_; + return *this; + } + Iter &operator--() { + elem_ = elem_->prev_; + return *this; + } + + bool operator!=(Iter other) const { return elem_ != other.elem_; } + bool operator==(Iter other) const { return elem_ == other.elem_; } + T *operator*() const { return static_cast(elem_); } + T *operator->() const { return static_cast(elem_); } + }; + friend class Iter; + + Elem head_; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_INTERNAL_LINKED_LIST_H_ diff --git a/tcmalloc/internal/linked_list_test.cc b/tcmalloc/internal/linked_list_test.cc new file mode 100644 index 000000000..750c7a824 --- /dev/null +++ b/tcmalloc/internal/linked_list_test.cc @@ -0,0 +1,253 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/linked_list.h" + +#include + +#include +#include + +#include "benchmark/benchmark.h" +#include "gtest/gtest.h" +#include "absl/container/node_hash_set.h" +#include "absl/random/random.h" + +namespace tcmalloc { +namespace { + +class LinkedListTest : public ::testing::Test { + protected: + void SetUp() override { list_.Init(); } + + LinkedList list_; +}; + +TEST_F(LinkedListTest, PushPop) { + const int N = 20; + std::vector ptrs{nullptr}; + + EXPECT_EQ(0, list_.length()); + EXPECT_TRUE(list_.empty()); + + for (int i = 0; i < N; i++) { + void* ptr = malloc(sizeof(void*)); + ASSERT_FALSE(ptr == nullptr); + ptrs.push_back(ptr); + + list_.Push(ptr); + + EXPECT_EQ(i + 1, list_.length()); + EXPECT_FALSE(list_.empty()); + } + + for (int i = N; i > 0; i--) { + EXPECT_EQ(i, list_.length()); + EXPECT_FALSE(list_.empty()); + + void* ptr; + bool ret = list_.TryPop(&ptr); + EXPECT_TRUE(ret); + EXPECT_EQ(ptrs[i], ptr); + + free(ptrs[i]); + } + + EXPECT_EQ(0, list_.length()); + EXPECT_TRUE(list_.empty()); +} + +// PushPopBatch validates that the batch operations push and pop the required +// number of elements from the list, but it does not assert that order within +// the batch is maintained. +TEST_F(LinkedListTest, PushPopBatch) { + const std::vector batch_sizes{1, 3, 5, 7, 10, 16}; + absl::node_hash_set pushed; + + size_t length = 0; + for (int batch_size : batch_sizes) { + std::vector batch; + + for (int i = 0; i < batch_size; i++) { + void* ptr = malloc(sizeof(void*)); + ASSERT_FALSE(ptr == nullptr); + batch.push_back(ptr); + pushed.insert(ptr); + } + + list_.PushBatch(batch_size, batch.data()); + length += batch_size; + + EXPECT_EQ(length, list_.length()); + EXPECT_EQ(length == 0, list_.empty()); + } + + absl::node_hash_set popped; + for (int batch_size : batch_sizes) { + std::vector batch(batch_size, nullptr); + list_.PopBatch(batch_size, batch.data()); + length -= batch_size; + + popped.insert(batch.begin(), batch.end()); + EXPECT_EQ(length, list_.length()); + EXPECT_EQ(length == 0, list_.empty()); + } + + EXPECT_EQ(pushed, popped); + + for (void* ptr : pushed) { + free(ptr); + } +} + +class MockSpan; +typedef TList MockSpanList; + +class MockSpan : public MockSpanList::Elem { + public: + MockSpan() {} + + static MockSpan* New(int idx = 0) { + MockSpan* ret = new MockSpan(); + ret->index_ = idx; + return ret; + } + + int index_; +}; + +class TListTest : public ::testing::Test { + protected: + void SetUp() override { list_.Init(); } + + MockSpanList list_; +}; + +TEST_F(TListTest, AppendPushPop) { + const int N = 20; + + EXPECT_EQ(list_.length(), 0); + EXPECT_TRUE(list_.empty()); + + // Append N elements to the list. + for (int i = 0; i < N; i++) { + MockSpan* s = MockSpan::New(i); + ASSERT_FALSE(s == nullptr); + list_.append(s); + EXPECT_EQ(list_.first()->index_, 0); + EXPECT_EQ(list_.last()->index_, i); + + EXPECT_EQ(list_.length(), i + 1); + EXPECT_FALSE(list_.empty()); + } + + // Remove all N elements from the end of the list. + for (int i = N; i > 0; i--) { + EXPECT_EQ(list_.length(), i); + EXPECT_FALSE(list_.empty()); + + MockSpan* last = list_.last(); + EXPECT_EQ(list_.first()->index_, 0); + EXPECT_EQ(list_.last()->index_, i - 1); + + EXPECT_FALSE(last == nullptr); + bool ret = list_.remove(last); + // Returns true iff the list is empty after the remove. + EXPECT_EQ(ret, i == 1); + + delete last; + } + EXPECT_EQ(list_.length(), 0); + EXPECT_TRUE(list_.empty()); +} + +TEST_F(TListTest, PrependPushPop) { + const int N = 20; + + EXPECT_EQ(list_.length(), 0); + EXPECT_TRUE(list_.empty()); + + // Prepend N elements to the list. + for (int i = 0; i < N; i++) { + MockSpan* s = MockSpan::New(i); + ASSERT_FALSE(s == nullptr); + list_.prepend(s); + EXPECT_EQ(list_.first()->index_, i); + EXPECT_EQ(list_.last()->index_, 0); + + EXPECT_EQ(list_.length(), i + 1); + EXPECT_FALSE(list_.empty()); + } + + // Check range iterator + { + int x = N - 1; + for (const MockSpan* s : list_) { + EXPECT_EQ(s->index_, x); + x--; + } + } + + // Remove all N elements from the front of the list. + for (int i = N; i > 0; i--) { + EXPECT_EQ(list_.length(), i); + EXPECT_FALSE(list_.empty()); + + MockSpan* first = list_.first(); + EXPECT_EQ(list_.first()->index_, i - 1); + EXPECT_EQ(list_.last()->index_, 0); + + EXPECT_FALSE(first == nullptr); + bool ret = list_.remove(first); + // Returns true iff the list is empty after the remove. + EXPECT_EQ(ret, i == 1); + + delete first; + } + EXPECT_EQ(list_.length(), 0); + EXPECT_TRUE(list_.empty()); +} + +TEST_F(TListTest, AppendRandomRemove) { + const int N = 100; + std::vector v(N); + + // Append N elements to the list. + for (int i = 0; i < N; i++) { + MockSpan* s = MockSpan::New(i); + ASSERT_FALSE(s == nullptr); + v[i] = s; + list_.append(s); + } + + // Remove all N elements from the list in a random order + std::shuffle(v.begin(), v.end(), absl::BitGen()); + int i = N; + for (MockSpan* s : v) { + EXPECT_EQ(list_.length(), i); + EXPECT_FALSE(list_.empty()); + + bool ret = list_.remove(s); + // Returns true iff the list is empty after the remove. + EXPECT_EQ(ret, i == 1); + + delete s; + i--; + } + EXPECT_EQ(list_.length(), 0); + EXPECT_TRUE(list_.empty()); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/internal/linux_syscall_support.h b/tcmalloc/internal/linux_syscall_support.h new file mode 100644 index 000000000..68dea77ee --- /dev/null +++ b/tcmalloc/internal/linux_syscall_support.h @@ -0,0 +1,52 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_LINUX_SYSCALL_SUPPORT_H_ +#define TCMALLOC_INTERNAL_LINUX_SYSCALL_SUPPORT_H_ + +/* include/uapi/linux/rseq.h */ + +struct kernel_rseq { + unsigned cpu_id_start; + unsigned cpu_id; + unsigned long long rseq_cs; + unsigned flags; +} __attribute__((aligned(4 * sizeof(unsigned long long)))); + +static_assert(sizeof(kernel_rseq) == (4 * sizeof(unsigned long long)), + "Unexpected size for rseq structure"); + +struct kernel_rseq_cs { + unsigned version; + unsigned flags; + unsigned long long start_ip; + unsigned long long post_commit_offset; + unsigned long long abort_ip; + // This is aligned, per upstream RSEQ specification. +} __attribute__((aligned(4 * sizeof(unsigned long long)))); + +static_assert(sizeof(kernel_rseq_cs) == (4 * sizeof(unsigned long long)), + "Unexpected size for rseq_cs structure"); + +#if !defined(__NR_rseq) +#if defined(__x86_64__) +#define __NR_rseq 334 +#elif defined(__aarch64__) +#define __NR_rseq 398 +#elif defined(__PPC__) +#define __NR_rseq 387 +#endif +#endif + +#endif // TCMALLOC_INTERNAL_LINUX_SYSCALL_SUPPORT_H_ diff --git a/tcmalloc/internal/logging.cc b/tcmalloc/internal/logging.cc new file mode 100644 index 000000000..5c008340e --- /dev/null +++ b/tcmalloc/internal/logging.cc @@ -0,0 +1,273 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/logging.h" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "absl/base/attributes.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/macros.h" +#include "absl/debugging/stacktrace.h" +#include "tcmalloc/internal/parameter_accessors.h" +#include "tcmalloc/malloc_extension.h" + +// Variables for storing crash output. Allocated statically since we +// may not be able to heap-allocate while crashing. +static absl::base_internal::SpinLock crash_lock( + absl::base_internal::kLinkerInitialized); +static bool crashed = false; + +static const size_t kStatsBufferSize = 16 << 10; +static char stats_buffer[kStatsBufferSize] = { 0 }; + +namespace tcmalloc { + +static void WriteMessage(const char* msg, int length) { + syscall(SYS_write, STDERR_FILENO, msg, length); +} + +void (*log_message_writer)(const char* msg, int length) = WriteMessage; + + +class Logger { + public: + bool Add(const LogItem& item); + bool AddStr(const char* str, int n); + bool AddNum(uint64_t num, int base); // base must be 10 or 16. + + static const int kBufSize = 200; + char* p_; + char* end_; + char buf_[kBufSize]; +}; + +ABSL_ATTRIBUTE_NOINLINE +void Log(LogMode mode, const char* filename, int line, + LogItem a, LogItem b, LogItem c, LogItem d) { + Logger state; + state.p_ = state.buf_; + state.end_ = state.buf_ + sizeof(state.buf_); + state.AddStr(filename, strlen(filename)) + && state.AddStr(":", 1) + && state.AddNum(line, 10) + && state.AddStr("]", 1) + && state.Add(a) + && state.Add(b) + && state.Add(c) + && state.Add(d); + + const bool crash = (mode == kCrash || mode == kCrashWithStats); + StackTrace t; + if (crash || mode == kLogWithStack) { + t.depth = absl::GetStackTrace(t.stack, tcmalloc::kMaxStackDepth, 1); + state.Add(LogItem("@")); + for (int i = 0; i < t.depth; i++) { + state.Add(LogItem(t.stack[i])); + } + } + + // Teminate with newline + if (state.p_ >= state.end_) { + state.p_ = state.end_ - 1; + } + *state.p_ = '\n'; + state.p_++; + + int msglen = state.p_ - state.buf_; + if (!crash) { + (*log_message_writer)(state.buf_, msglen); + return; + } + + // FailureSignalHandler mallocs for various logging attempts. + // We might be crashing holding tcmalloc locks. + // We're substantially less likely to try to take those locks + // (and thus deadlock until the alarm timer fires) if we disable sampling. + if (TCMalloc_Internal_SetProfileSamplingRate != nullptr) { + TCMalloc_Internal_SetProfileSamplingRate(0); + } + + bool first_crash = false; + { + absl::base_internal::SpinLockHolder l(&crash_lock); + if (!crashed) { + crashed = true; + first_crash = true; + } + } + + (*log_message_writer)(state.buf_, msglen); + if (first_crash && mode == kCrashWithStats) { + if (&TCMalloc_Internal_GetStats != nullptr) { + size_t n = TCMalloc_Internal_GetStats(stats_buffer, kStatsBufferSize); + (*log_message_writer)(stats_buffer, std::min(n, kStatsBufferSize)); + } + } + + abort(); +} + +bool Logger::Add(const LogItem& item) { + // Separate real items with spaces + if (item.tag_ != LogItem::kEnd && p_ < end_) { + *p_ = ' '; + p_++; + } + + switch (item.tag_) { + case LogItem::kStr: + return AddStr(item.u_.str, strlen(item.u_.str)); + case LogItem::kUnsigned: + return AddNum(item.u_.unum, 10); + case LogItem::kSigned: + if (item.u_.snum < 0) { + // The cast to uint64_t is intentionally before the negation + // so that we do not attempt to negate -2^63. + return AddStr("-", 1) + && AddNum(- static_cast(item.u_.snum), 10); + } else { + return AddNum(static_cast(item.u_.snum), 10); + } + case LogItem::kPtr: + return AddStr("0x", 2) + && AddNum(reinterpret_cast(item.u_.ptr), 16); + default: + return false; + } +} + +bool Logger::AddStr(const char* str, int n) { + if (end_ - p_ < n) { + return false; + } else { + memcpy(p_, str, n); + p_ += n; + return true; + } +} + +bool Logger::AddNum(uint64_t num, int base) { + static const char kDigits[] = "0123456789abcdef"; + char space[22]; // more than enough for 2^64 in smallest supported base (10) + char* end = space + sizeof(space); + char* pos = end; + do { + pos--; + *pos = kDigits[num % base]; + num /= base; + } while (num > 0 && pos > space); + return AddStr(pos, end - pos); +} + +} // namespace tcmalloc + +void TCMalloc_Printer::printf(const char* format, ...) { + ASSERT(left_ >= 0); + va_list ap; + va_start(ap, format); + const int r = vsnprintf(buf_, left_, format, ap); + va_end(ap); + if (r < 0) { + // Perhaps an old glibc that returns -1 on truncation? We can't draw + // conclusions on how this affects the required buffer space. + left_ = 0; + return; + } + + required_ += r; + + if (r > left_) { + // Truncation + left_ = 0; + } else { + left_ -= r; + buf_ += r; + } +} + +PbtxtRegion::PbtxtRegion(TCMalloc_Printer* out, PbtxtRegionType type, + int indent) + : out_(out), type_(type), indent_(indent) { + switch (type_) { + case kTop: + break; + case kNested: + out_->printf("{"); + break; + } + ++indent_; +} + +PbtxtRegion::~PbtxtRegion() { + --indent_; + out_->printf("\n"); + for (int i = 0; i < indent_; i++) { + out_->printf(" "); + } + switch (type_) { + case kTop: + break; + case kNested: + out_->printf("}"); + break; + } +} + +void PbtxtRegion::NewLineAndIndent() { + out_->printf("\n"); + for (int i = 0; i < indent_; i++) { + out_->printf(" "); + } +} + +void PbtxtRegion::PrintU64(absl::string_view key, uint64_t value) { + NewLineAndIndent(); + out_->printf("%s: %" PRIu64, key.data(), value); +} + +void PbtxtRegion::PrintI64(absl::string_view key, int64_t value) { + NewLineAndIndent(); + out_->printf("%s: %" PRIi64, key.data(), value); +} + +void PbtxtRegion::PrintDouble(absl::string_view key, double value) { + NewLineAndIndent(); + out_->printf("%s: %.3g", key.data(), value); +} + +void PbtxtRegion::PrintBool(absl::string_view key, bool value) { + NewLineAndIndent(); + out_->printf("%s: %s", key.data(), value ? "true" : "false"); +} + +void PbtxtRegion::PrintRaw(absl::string_view key, absl::string_view value) { + NewLineAndIndent(); + out_->printf("%s: %s", key.data(), value.data()); +} + +PbtxtRegion PbtxtRegion::CreateSubRegion(absl::string_view key) { + NewLineAndIndent(); + out_->printf("%s ", key.data()); + PbtxtRegion sub(out_, kNested, indent_); + return sub; +} diff --git a/tcmalloc/internal/logging.h b/tcmalloc/internal/logging.h new file mode 100644 index 000000000..e4317eef0 --- /dev/null +++ b/tcmalloc/internal/logging.h @@ -0,0 +1,209 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Internal logging and related utility routines. + +#ifndef TCMALLOC_INTERNAL_LOGGING_H_ +#define TCMALLOC_INTERNAL_LOGGING_H_ + +#include + +#include "absl/base/optimization.h" +#include "absl/strings/string_view.h" + +//------------------------------------------------------------------- +// Utility routines +//------------------------------------------------------------------- + +// Safe logging helper: we write directly to the stderr file +// descriptor and avoid FILE buffering because that may invoke +// malloc(). +// +// Example: +// Log(kLog, __FILE__, __LINE__, "error", bytes); + +namespace tcmalloc { + +static constexpr int kMaxStackDepth = 64; + +// size/depth are made the same size as a pointer so that some generic +// code below can conveniently cast them back and forth to void*. +struct StackTrace { + + // For small sampled objects, we allocate a full span to hold the + // sampled object. However to avoid disturbing fragmentation + // profiles, in such cases we also allocate a small proxy object + // using the normal mechanism. + // + // proxy field is defined only for heap sample stack traces. + // For heap samples, proxy==NULL iff size > kMaxSize. + void* proxy; + + uintptr_t requested_size; + uintptr_t requested_alignment; + uintptr_t allocated_size; // size after sizeclass/page rounding + uintptr_t depth; // Number of PC values stored in array below + void* stack[kMaxStackDepth]; + + // weight is the expected number of *bytes* that were requested + // between the previous sample and this one + size_t weight; + + template + friend H AbslHashValue(H h, const StackTrace& t) { + // As we use StackTrace as a key-value node in StackTraceTable, we only + // produce a hasher for the fields used as keys. + return H::combine(H::combine_contiguous(std::move(h), t.stack, t.depth), + t.depth, t.requested_size, t.requested_alignment, + t.allocated_size); + } +}; + +enum LogMode { + kLog, // Just print the message + kLogWithStack, // Print the message and a stack trace + kCrash, // Print the message and crash + kCrashWithStats // Print the message, some stats, and crash +}; + +class Logger; + +// A LogItem holds any of the argument types that can be passed to Log() +class LogItem { + public: + LogItem() : tag_(kEnd) { } + LogItem(const char* v) : tag_(kStr) { u_.str = v; } + LogItem(int v) : tag_(kSigned) { u_.snum = v; } + LogItem(long v) : tag_(kSigned) { u_.snum = v; } + LogItem(long long v) : tag_(kSigned) { u_.snum = v; } + LogItem(unsigned int v) : tag_(kUnsigned) { u_.unum = v; } + LogItem(unsigned long v) : tag_(kUnsigned) { u_.unum = v; } + LogItem(unsigned long long v) : tag_(kUnsigned) { u_.unum = v; } + LogItem(const void* v) : tag_(kPtr) { u_.ptr = v; } + + private: + friend class Logger; + enum Tag { + kStr, + kSigned, + kUnsigned, + kPtr, + kEnd + }; + Tag tag_; + union { + const char* str; + const void* ptr; + int64_t snum; + uint64_t unum; + } u_; +}; + +extern void Log(LogMode mode, const char* filename, int line, + LogItem a, LogItem b = LogItem(), + LogItem c = LogItem(), LogItem d = LogItem()); + +// Tests can override this function to collect logging messages. +extern void (*log_message_writer)(const char* msg, int length); + +} // namespace tcmalloc + +// Like assert(), but executed even in NDEBUG mode +#undef CHECK_CONDITION +#define CHECK_CONDITION(cond) \ + (ABSL_PREDICT_TRUE(cond) \ + ? (void)0 \ + : (::tcmalloc::Log(::tcmalloc::kCrash, __FILE__, __LINE__, #cond), \ + __builtin_unreachable())) + +// Our own version of assert() so we can avoid hanging by trying to do +// all kinds of goofy printing while holding the malloc lock. +#ifndef NDEBUG +#define ASSERT(cond) CHECK_CONDITION(cond) +#else +#define ASSERT(cond) ((void) 0) +#endif + +// Our wrapper for __builtin_assume, allowing us to check the assumption on +// debug builds. +#ifndef NDEBUG +#ifdef __clang__ +#define ASSUME(cond) CHECK_CONDITION(cond), __builtin_assume(cond) +#else +#define ASSUME(cond) \ + CHECK_CONDITION(cond), (!(cond) ? __builtin_unreachable() : (void)0) +#endif +#else +#ifdef __clang__ +#define ASSUME(cond) __builtin_assume(cond) +#else +#define ASSUME(cond) (!(cond) ? __builtin_unreachable() : (void)0) +#endif +#endif + +// Print into buffer +class TCMalloc_Printer { + private: + char* buf_; // Where should we write next + int left_; // Space left in buffer (including space for \0) + int required_; // Space we needed to complete all printf calls up to this + // point + + public: + // REQUIRES: "length > 0" + TCMalloc_Printer(char* buf, int length) + : buf_(buf), left_(length), required_(1) { + ASSERT(length > 0); + buf[0] = '\0'; + } + + void printf(const char* format, ...) + __attribute__ ((__format__ (__printf__, 2, 3))); + + int SpaceRequired() const { return required_; } +}; + +enum PbtxtRegionType { kTop, kNested }; + +// A helper class that prints pbtxt via RAII. A pbtxt region can be either a +// top region (with no brackets) or a nested region (enclosed by curly +// brackets). +class PbtxtRegion { + public: + PbtxtRegion(TCMalloc_Printer* out, PbtxtRegionType type, int indent); + ~PbtxtRegion(); + + PbtxtRegion(const PbtxtRegion&) = delete; + PbtxtRegion(PbtxtRegion&&) = default; + + // Prints 'key: value'. + void PrintU64(absl::string_view key, uint64_t value); + void PrintI64(absl::string_view key, int64_t value); + void PrintDouble(absl::string_view key, double value); + void PrintBool(absl::string_view key, bool value); + // Useful for enums. + void PrintRaw(absl::string_view key, absl::string_view value); + + // Prints 'key subregion'. Return the created subregion. + PbtxtRegion CreateSubRegion(absl::string_view key); + + private: + void NewLineAndIndent(); + + TCMalloc_Printer* out_; + PbtxtRegionType type_; + int indent_; +}; + +#endif // TCMALLOC_INTERNAL_LOGGING_H_ diff --git a/tcmalloc/internal/logging_test.cc b/tcmalloc/internal/logging_test.cc new file mode 100644 index 000000000..68287e77a --- /dev/null +++ b/tcmalloc/internal/logging_test.cc @@ -0,0 +1,109 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/logging.h" + +#include + +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +static std::string* log_buffer; + +namespace tcmalloc { + +static void RecordLogMessage(const char* msg, int length) { + // Make tests less brittle by trimming trailing whitespace + while (length > 0 && (msg[length - 1] == ' ' || msg[length - 1] == '\n')) { + length--; + } + log_buffer->assign(msg, length); +} + +TEST(InternalLogging, MessageFormatting) { + std::string long_string; + for (int i = 0; i < 100; i++) { + long_string += "the quick brown fox jumped over the lazy dog"; + } + + // Arrange to intercept Log() output + log_buffer = new std::string(); + void (*old_writer)(const char*, int) = log_message_writer; + log_message_writer = RecordLogMessage; + + Log(kLog, "foo.cc", 100, "Hello"); + EXPECT_EQ("foo.cc:100] Hello", *log_buffer); + + Log(kLog, "foo.cc", 100, 123u, -456, 0); + EXPECT_EQ("foo.cc:100] 123 -456 0", *log_buffer); + + Log(kLog, "foo.cc", 100, 123u, std::numeric_limits::min()); + EXPECT_EQ("foo.cc:100] 123 -9223372036854775808", *log_buffer); + + Log(kLog, "foo.cc", 2, + reinterpret_cast(static_cast(1025))); + EXPECT_EQ("foo.cc:2] 0x401", *log_buffer); + + Log(kLog, "foo.cc", 10, "hello", long_string.c_str()); + EXPECT_EQ("foo.cc:10] hello", *log_buffer); + + Log(kLogWithStack, "foo.cc", 10, "stk"); + EXPECT_TRUE(strstr(log_buffer->c_str(), "stk @ 0x") != nullptr) + << *log_buffer; + + log_message_writer = old_writer; + delete log_buffer; +} + +TEST(InternalLogging, Assert) { + CHECK_CONDITION((2 + 2) == 4); + + if (false) + CHECK_CONDITION(false); + else + CHECK_CONDITION(true); + + ASSERT_DEATH(CHECK_CONDITION((2 + 2) == 5), + ".*tcmalloc\\/internal/logging_test\\.cc:[0-9]+\\] " + "\\(2 \\+ 2\\) == 5 @( 0x[0-9a-f]+)+"); +} + +TEST(Printer, RequiredSpace) { + const char kChunk[] = "0123456789"; + std::string expected; + + for (int i = 0; i < 10; i++) { + int length = strlen(kChunk) * i + 1; + std::unique_ptr buf(new char[length]); + TCMalloc_Printer printer(buf.get(), length); + + for (int j = 0; j < i; j++) { + printer.printf("%s", kChunk); + } + EXPECT_EQ(buf.get(), expected); + EXPECT_EQ(length, printer.SpaceRequired()); + + // Go past the end of the buffer. This should not overrun or affect the + // existing contents of buf, but we should see SpaceRequired tick up. + printer.printf("%s", kChunk); + EXPECT_EQ(buf.get(), expected); + EXPECT_EQ(length + strlen(kChunk), printer.SpaceRequired()); + + expected.append(kChunk); + } +} + +} // namespace tcmalloc diff --git a/tcmalloc/internal/memory_stats.cc b/tcmalloc/internal/memory_stats.cc new file mode 100644 index 000000000..c7e7a40d4 --- /dev/null +++ b/tcmalloc/internal/memory_stats.cc @@ -0,0 +1,129 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/memory_stats.h" + +#include +#include +#include +#include + +#include "absl/strings/numbers.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/util.h" + +namespace tcmalloc { +namespace tcmalloc_internal { + +namespace { + +struct FDCloser { + FDCloser() : fd(-1) {} + ~FDCloser() { + if (fd != -1) { + signal_safe_close(fd); + } + } + int fd; +}; + +} // namespace + +bool GetMemoryStats(MemoryStats* stats) { +#if !defined(__linux__) + return false; +#endif + + FDCloser fd; + fd.fd = signal_safe_open("/proc/self/statm", O_RDONLY | O_CLOEXEC); + ASSERT(fd.fd >= 0); + if (fd.fd < 0) { + return false; + } + + char buf[1024]; + ssize_t rc = signal_safe_read(fd.fd, buf, sizeof(buf), nullptr); + ASSERT(rc >= 0); + ASSERT(rc < sizeof(buf)); + if (rc < 0 || rc >= sizeof(buf)) { + return false; + } + buf[rc] = '\0'; + + const size_t pagesize = getpagesize(); + absl::string_view contents(buf, rc); + absl::string_view::size_type start = 0; + int index = 0; + do { + auto end = contents.find(' ', start); + + absl::string_view value; + if (end == absl::string_view::npos) { + value = contents.substr(start); + } else { + value = contents.substr(start, end - start); + } + + int64_t parsed; + if (!absl::SimpleAtoi(value, &parsed)) { + return false; + } + + // Fields in /proc/self/statm: + // [0] = vss + // [1] = rss + // [2] = shared + // [3] = code + // [4] = unused + // [5] = data + stack + // [6] = unused + switch (index) { + case 0: + stats->vss = parsed * pagesize; + break; + case 1: + stats->rss = parsed * pagesize; + break; + case 2: + stats->shared = parsed * pagesize; + break; + case 3: + stats->code = parsed * pagesize; + break; + case 5: + stats->data = parsed * pagesize; + break; + case 4: + case 6: + default: + // Unused + break; + } + + if (end == absl::string_view::npos) { + break; + } + + start = end + 1; + } while (start < contents.size() && index++ < 6); + + if (index < 6) { + return false; + } + + return true; +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/tcmalloc/internal/memory_stats.h b/tcmalloc/internal/memory_stats.h new file mode 100644 index 000000000..ece44d755 --- /dev/null +++ b/tcmalloc/internal/memory_stats.h @@ -0,0 +1,37 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_MEMORY_STATS_H_ +#define TCMALLOC_INTERNAL_MEMORY_STATS_H_ + +#include + +namespace tcmalloc { +namespace tcmalloc_internal { + +struct MemoryStats { + int64_t vss; + int64_t rss; + int64_t shared; + int64_t code; + int64_t data; +}; + +// Memory stats of a process +bool GetMemoryStats(MemoryStats* stats); + +} // namespace tcmalloc_internal +} // namespace tcmalloc + +#endif // TCMALLOC_INTERNAL_MEMORY_STATS_H_ diff --git a/tcmalloc/internal/memory_stats_test.cc b/tcmalloc/internal/memory_stats_test.cc new file mode 100644 index 000000000..176c71273 --- /dev/null +++ b/tcmalloc/internal/memory_stats_test.cc @@ -0,0 +1,43 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/memory_stats.h" + +#include + +#include "gtest/gtest.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +TEST(Stats, ValidRanges) { + MemoryStats stats; +#if defined(__linux__) + ASSERT_TRUE(GetMemoryStats(&stats)); +#else + ASSERT_FALSE(GetMemoryStats(&stats)); + return; +#endif + + EXPECT_GT(stats.vss, 0); + EXPECT_GT(stats.rss, 0); + EXPECT_GT(stats.shared, 0); + EXPECT_GT(stats.code, 0); + EXPECT_GT(stats.data, 0); +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/tcmalloc/internal/mincore.cc b/tcmalloc/internal/mincore.cc new file mode 100644 index 000000000..a5acce506 --- /dev/null +++ b/tcmalloc/internal/mincore.cc @@ -0,0 +1,120 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/mincore.h" + +#include +#include + +#include +#include + +namespace tcmalloc { + +// Class that implements the call into the OS provided mincore() function. +class OsMInCore : public MInCoreInterface { + int mincore(void* addr, size_t length, unsigned char* result) final { + return ::mincore(addr, length, result); + } +}; + +// Returns the number of resident bytes for an range of memory of arbitrary +// alignment and size. +size_t MInCore::residence_impl(void* addr, size_t size, + MInCoreInterface* mincore) { + if (size == 0) { + return 0; + } + unsigned char res[kArrayLength]; + const size_t kPageSize = getpagesize(); + + uintptr_t uaddr = reinterpret_cast(addr); + // Round address down to get the start of the page containing the data. + uintptr_t basePage = uaddr & ~(kPageSize - 1); + // Round end address up to get the end of the page containing the data. + uintptr_t endPage = (uaddr + size + kPageSize - 1) & ~(kPageSize - 1); + + uintptr_t remainingPages = endPage - basePage; + + // We need to handle the first and last pages differently. Most pages + // will contribute pagesize bytes to residence, but the first and last + // pages will contribute fewer than that. Easiest way to do this is to + // handle the special case where the entire object fits into a page, + // then handle the case where the object spans more than one page. + if (remainingPages == kPageSize) { + // Find out whether the first page is resident. + mincore->mincore(reinterpret_cast(basePage), remainingPages, res); + // Residence info is returned in LSB, other bits are undefined. + if ((res[0] & 1) == 1) { + return size; + } + return 0; + } + + // We're calling this outside the loop so that we can get info for the + // first page, deal with subsequent pages in the loop, and then handle + // the last page after the loop. + size_t scanLength = std::min(remainingPages, kPageSize * kArrayLength); + if (mincore->mincore(reinterpret_cast(basePage), scanLength, res) != + 0) { + return 0; + } + + size_t totalResident = 0; + + // Handle the first page. + size_t firstPageSize = kPageSize - (uaddr - basePage); + if ((res[0] & 1) == 1) { + totalResident += firstPageSize; + } + basePage += kPageSize; + remainingPages -= kPageSize; + + int resIndex = 1; + + // Handle all pages but the last page. + while (remainingPages > kPageSize) { + if ((res[resIndex] & 1) == 1) { + totalResident += kPageSize; + } + resIndex++; + basePage += kPageSize; + remainingPages -= kPageSize; + // Refresh the array if necessary. + if (resIndex == kArrayLength) { + resIndex = 0; + scanLength = std::min(remainingPages, kPageSize * kArrayLength); + if (mincore->mincore(reinterpret_cast(basePage), scanLength, + res) != 0) { + return 0; + } + } + } + + // Check final page + size_t lastPageSize = kPageSize - (endPage - uaddr - size); + if ((res[resIndex] & 1) == 1) { + totalResident += lastPageSize; + } + + return totalResident; +} + +// Return residence info using call to OS provided mincore(). +size_t MInCore::residence(void* addr, size_t size) { + OsMInCore mc; + return residence_impl(addr, size, &mc); +} + +} // End namespace tcmalloc diff --git a/tcmalloc/internal/mincore.h b/tcmalloc/internal/mincore.h new file mode 100644 index 000000000..0696af3b5 --- /dev/null +++ b/tcmalloc/internal/mincore.h @@ -0,0 +1,59 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_MINCORE_H_ +#define TCMALLOC_INTERNAL_MINCORE_H_ + +#include + +namespace tcmalloc { + +// Class to wrap mincore so that we can replace it for testing. +class MInCoreInterface { + public: + MInCoreInterface() {} + virtual ~MInCoreInterface() {} + virtual int mincore(void* addr, size_t length, unsigned char* result) = 0; + + private: + MInCoreInterface(const MInCoreInterface&) = delete; + MInCoreInterface& operator=(const MInCoreInterface&) = delete; +}; + +// The MInCore class through the function residence(addr, size) provides +// a convenient way to report the residence of an arbitrary memory region. +// This is a wrapper for the ::mincore() function. The ::mincore() function has +// the constraint of requiring the base address to be page aligned. +class MInCore { + public: + MInCore() {} + // For a region of memory return the number of bytes that are + // actually resident in memory. Note that the address and size + // do not need to be a multiple of the system page size. + static size_t residence(void* addr, size_t size); + + private: + // Separate out the implementation to make the code easier to test. + static size_t residence_impl(void* addr, size_t size, + MInCoreInterface* mincore); + + // Size of the array used to gather results from mincore(). + static constexpr int kArrayLength = 4096; + // Friends required for testing + friend class MInCoreTest; +}; + +} // End namespace tcmalloc + +#endif // TCMALLOC_INTERNAL_MINCORE_H_ diff --git a/tcmalloc/internal/mincore_test.cc b/tcmalloc/internal/mincore_test.cc new file mode 100644 index 000000000..78fbf2451 --- /dev/null +++ b/tcmalloc/internal/mincore_test.cc @@ -0,0 +1,191 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/mincore.h" + +#include +#include + +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { + +using ::testing::Eq; + +// Mock interface to mincore() which has reports residence based on +// an array provided at construction. +class MInCoreMock : public ::tcmalloc::MInCoreInterface { + public: + MInCoreMock() : mapped_() {} + ~MInCoreMock() override {} + + // Implementation of minCore that reports presence based on provided array. + int mincore(void* addr, size_t length, unsigned char* result) override { + const size_t kPageSize = getpagesize(); + uintptr_t uAddress = reinterpret_cast(addr); + // Check that we only pass page aligned addresses into mincore(). + EXPECT_THAT(uAddress & (kPageSize - 1), Eq(0)); + + uintptr_t uEndAddress = uAddress + length; + int index = 0; + // Check for presence of the target pages in the map. + while (uAddress < uEndAddress) { + result[index] = (mapped_.find(uAddress) != mapped_.end() ? 1 : 0); + uAddress += kPageSize; + index++; + } + return 0; + } + + void addPage(uintptr_t uAddress) { mapped_.insert(uAddress); } + + private: + std::set mapped_; +}; + +// Friend class of MInCore which calls the mincore mock. +class MInCoreTest { + public: + MInCoreTest() : mcm_() {} + ~MInCoreTest() {} + + size_t residence(uintptr_t addr, size_t size) { + return MInCore::residence_impl(reinterpret_cast(addr), size, &mcm_); + } + + void addPage(uintptr_t page) { mcm_.addPage(page); } + + // Expose the internal size of array that we use to call mincore() so + // that we can be sure to need multiple calls to cover large memory regions. + const size_t chunkSize() { return tcmalloc::MInCore::kArrayLength; } + + private: + tcmalloc::MInCoreMock mcm_; +}; + +namespace { + +using ::testing::Eq; + +TEST(StaticVarsTest, TestResidence) { + tcmalloc::MInCoreTest mct; + const size_t kPageSize = getpagesize(); + + // Set up a pattern with a few resident pages. + // page 0 not mapped + mct.addPage(kPageSize); + // page 2 not mapped + mct.addPage(3 * kPageSize); + mct.addPage(4 * kPageSize); + + // An object of size zero should have a residence of zero. + EXPECT_THAT(mct.residence(320, 0), Eq(0)); + + // Check that an object entirely on the first page is + // reported as entirely unmapped. + EXPECT_THAT(mct.residence(320, 55), Eq(0)); + + // Check that an object entirely on the second page is + // reported as entirely mapped. + EXPECT_THAT(mct.residence(kPageSize + 320, 55), Eq(55)); + + // An object of size zero should have a residence of zero. + EXPECT_THAT(mct.residence(kPageSize + 320, 0), Eq(0)); + + // Check that an object over a mapped and unmapped page is half mapped. + EXPECT_THAT(mct.residence(kPageSize / 2, kPageSize), Eq(kPageSize / 2)); + + // Check that an object which spans two pages is reported as being mapped + // only on the page that's resident. + EXPECT_THAT(mct.residence(kPageSize / 2 * 3, kPageSize), Eq(kPageSize / 2)); + + // Check that an object that is on two mapped pages is reported as entirely + // resident. + EXPECT_THAT(mct.residence(kPageSize / 2 * 7, kPageSize), Eq(kPageSize)); + + // Check that an object that is on one mapped page is reported as only + // resident on the mapped page. + EXPECT_THAT(mct.residence(kPageSize * 2, kPageSize + 1), Eq(1)); + + // Check that an object that is on one mapped page is reported as only + // resident on the mapped page. + EXPECT_THAT(mct.residence(kPageSize + 1, kPageSize + 1), Eq(kPageSize - 1)); + + // Check that an object which spans beyond the mapped pages is reported + // as unmapped + EXPECT_THAT(mct.residence(kPageSize * 6, kPageSize), Eq(0)); + + // Check an object that spans three pages, two of them mapped. + EXPECT_THAT(mct.residence(kPageSize / 2 * 7 + 1, kPageSize * 2), + Eq(kPageSize * 3 / 2 - 1)); +} + +// Test whether we are correctly handling multiple calls to mincore. +TEST(StaticVarsTest, TestLargeResidence) { + tcmalloc::MInCoreTest mct; + uintptr_t uAddress = 0; + const size_t kPageSize = getpagesize(); + // Set up a pattern covering 6 * page size * MInCore::kArrayLength to + // allow us to test for situations where the region we're checking + // requires multiple calls to mincore(). + // Use a mapped/unmapped/unmapped pattern, this will mean that + // the regions examined by mincore() do not have regular alignment + // with the pattern. + for (int i = 0; i < 2 * mct.chunkSize(); i++) { + mct.addPage(uAddress); + uAddress += 3 * kPageSize; + } + + uintptr_t baseAddress = 0; + for (int size = kPageSize; size < 32 * 1024 * 1024; size += 2 * kPageSize) { + uintptr_t unit = kPageSize * 3; + EXPECT_THAT(mct.residence(baseAddress, size), + Eq(kPageSize * ((size + unit - 1) / unit))); + } +} + +TEST(StaticVarsTest, UnmappedMemory) { + const size_t kPageSize = getpagesize(); + const int kNumPages = 16; + + // Overallocate kNumPages of memory, so we can munmap the page before and + // after it. + void* p = mmap(nullptr, (kNumPages + 2) * kPageSize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ASSERT_NE(p, MAP_FAILED) << errno; + ASSERT_EQ(munmap(p, kPageSize), 0); + void* q = reinterpret_cast(p) + kPageSize; + void* last = reinterpret_cast(p) + (kNumPages + 1) * kPageSize; + ASSERT_EQ(munmap(last, kPageSize), 0); + + memset(q, 0, kNumPages * kPageSize); + ::benchmark::DoNotOptimize(q); + + for (int i = 0; i <= kNumPages; i++) { + EXPECT_EQ(i * kPageSize, MInCore::residence(q, i * kPageSize)); + } + + ASSERT_EQ(munmap(q, kNumPages * kPageSize), 0); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/internal/parameter_accessors.h b/tcmalloc/internal/parameter_accessors.h new file mode 100644 index 000000000..d19445178 --- /dev/null +++ b/tcmalloc/internal/parameter_accessors.h @@ -0,0 +1,46 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_PARAMETER_ACCESSORS_H_ +#define TCMALLOC_INTERNAL_PARAMETER_ACCESSORS_H_ + +#include "absl/base/attributes.h" +#include "absl/types/optional.h" + +extern "C" { + +ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetDetectUseAfterFree(); +ABSL_ATTRIBUTE_WEAK uint64_t TCMalloc_Internal_GetHeapSizeHardLimit(); +ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetHPAASubrelease(); +ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetLazyPerCpuCachesEnabled(); +ABSL_ATTRIBUTE_WEAK double +TCMalloc_Internal_GetPeakSamplingHeapGrowthFraction(); +ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetPerCpuCachesEnabled(); +ABSL_ATTRIBUTE_WEAK size_t TCMalloc_Internal_GetStats(char* buffer, + size_t buffer_length); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetDetectUseAfterFree(bool v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetGuardedSamplingRate(int64_t v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetHeapSizeHardLimit(uint64_t v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetHPAASubrelease(bool v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetLazyPerCpuCachesEnabled(bool v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetMaxPerCpuCacheSize(int32_t v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetMaxTotalThreadCacheBytes(int64_t v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction( + double v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetPerCpuCachesEnabled(bool v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetReleaseRate(double v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetProfileSamplingRate(int64_t v); +} + +#endif // TCMALLOC_INTERNAL_PARAMETER_ACCESSORS_H_ diff --git a/tcmalloc/internal/percpu.cc b/tcmalloc/internal/percpu.cc new file mode 100644 index 000000000..471fbe9d6 --- /dev/null +++ b/tcmalloc/internal/percpu.cc @@ -0,0 +1,283 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "tcmalloc/internal/percpu.h" + +#include +#include +#include +#include +#include +#include + +#include + +#include "absl/base/attributes.h" +#include "absl/base/call_once.h" // IWYU pragma: keep +#include "absl/base/internal/sysinfo.h" +#include "tcmalloc/internal/linux_syscall_support.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/util.h" + +namespace tcmalloc { +namespace subtle { +namespace percpu { + +// ---------------------------------------------------------------------------- +// Internal structures +// ---------------------------------------------------------------------------- + +// Restartable Sequence (RSEQ) + +extern "C" { + // We provide a per-thread value (defined in percpu_.c) which both tracks + // thread-local initialization state and (with RSEQ) provides an atomic + // in-memory reference for this thread's execution CPU. This value is only + // valid when the thread is currently executing + // Possible values: + // Unavailable/uninitialized: + // { kCpuIdUnsupported, kCpuIdUninitialized } + // Initialized, available: + // [0, NumCpus()) (Always updated at context-switch) +ABSL_PER_THREAD_TLS_KEYWORD ABSL_ATTRIBUTE_WEAK volatile kernel_rseq + __rseq_abi = { + 0, + static_cast(kCpuIdUninitialized), + 0, + 0, +}; + +ABSL_PER_THREAD_TLS_KEYWORD ABSL_ATTRIBUTE_WEAK volatile uint32_t __rseq_refcount; + +#ifdef __ppc__ +// On PPC, we have two cases for accessing the __rseq_abi TLS variable: +// * For initial-exec TLS, we write the raw assembly for accessing the memory +// with the appropriate relocations and offsets. On optimized builds, this is +// the use case that matters. +// * For non-initial-exec TLS, access is far more involved. We call this helper +// function from percpu_rseq_ppc.S to leave the initialization and access to +// the compiler. +ABSL_ATTRIBUTE_UNUSED ABSL_ATTRIBUTE_NOINLINE void *tcmalloc_tls_fetch_pic() { + return const_cast(&__rseq_abi); +} +#endif + +} // extern "C" + +enum PerCpuInitStatus { + kFastMode, + kSlowMode, +}; + +ABSL_CONST_INIT static PerCpuInitStatus init_status = kSlowMode; +ABSL_CONST_INIT static absl::once_flag init_per_cpu_once; + +static bool InitThreadPerCpu() { + if (__rseq_refcount++ > 0) { + return true; + } + + auto ret = syscall(__NR_rseq, &__rseq_abi, sizeof(__rseq_abi), 0, + PERCPU_RSEQ_SIGNATURE); + if (ret == 0) { + return true; + } else { + __rseq_refcount--; + } + + return false; +} + +static void InitPerCpu() { + // Based on the results of successfully initializing the first thread, mark + // init_status to initialize all subsequent threads. + if (InitThreadPerCpu()) { + init_status = kFastMode; + } +} + +// Tries to initialize RSEQ both at the process-wide (init_status) and +// thread-level (cpu-id) level. If process-wide initialization has already been +// completed then only the thread-level will be completed. A return of false +// indicates that initialization failed and RSEQ is unavailable. +bool InitFastPerCpu() { + absl::base_internal::LowLevelCallOnce(&init_per_cpu_once, InitPerCpu); + + // Once we've decided fast-cpu support is available, initialization for all + // subsequent threads must succeed for consistency. + if (init_status == kFastMode && RseqCpuId() == kCpuIdUninitialized) { + CHECK_CONDITION(InitThreadPerCpu()); + } + + // If we've decided to use slow mode, set the thread-local CPU ID to + // __rseq_abi.cpu_id so that IsFast doesn't call this function again for + // this thread. + if (init_status == kSlowMode) { + __rseq_abi.cpu_id = kCpuIdUnsupported; + } + + return init_status == kFastMode; +} + +// ---------------------------------------------------------------------------- +// Implementation of unaccelerated (no RSEQ) per-cpu operations +// ---------------------------------------------------------------------------- + +static bool SetAffinityOneCpu(int cpu) { + cpu_set_t set; + CPU_ZERO(&set); + CPU_SET(cpu, &set); + if (0 == sched_setaffinity(0, sizeof(cpu_set_t), &set)) { + return true; + } + CHECK_CONDITION(errno == EINVAL); + return false; +} + +// We're being asked to fence against the mask , but a NULL mask +// means every CPU. Do we need ? +static bool NeedCpu(int cpu, const cpu_set_t *cpus) { + if (cpus == nullptr) return true; + return CPU_ISSET(cpu, cpus); +} + +static void SlowFence(const cpu_set_t *cpus) { + // Necessary, so the point in time mentioned below has visibility + // of our writes. + std::atomic_thread_fence(std::memory_order_seq_cst); + + // First, save our cpumask (the user may want it back.) + cpu_set_t old; + CPU_ZERO(&old); + CHECK_CONDITION(0 == sched_getaffinity(0, sizeof(cpu_set_t), &old)); + + // Here's the basic idea: if we run on every CPU, then every thread + // that runs after us has certainly seen every store we've made up + // to this point, so we pin ourselves to each CPU in turn. + // + // But we can't run everywhere; our control plane may have set cpuset.cpus to + // some subset of CPUs (and may be changing it as we speak.) On the plus + // side, if we are unable to run on a particular CPU, the same is true for our + // siblings (up to some races, dealt with below), so we don't need to. + + for (int cpu = 0; cpu < absl::base_internal::NumCPUs(); ++cpu) { + if (!NeedCpu(cpu, cpus)) { + // unnecessary -- user doesn't care about synchronization on this cpu + continue; + } + // If we can't pin ourselves there, then no one else can run there, so + // that's fine. + while (SetAffinityOneCpu(cpu)) { + // But even if the pin succeeds, we might not end up running there; + // between the pin trying to migrate and running on , a change + // to cpuset.cpus may cause us to migrate somewhere else instead. + // So make sure we actually got where we wanted. + if (cpu == sched_getcpu()) { + break; + } + } + } + // Overly detailed explanation of kernel operations follows. + // + // OK, at this point, for each cpu i, there are two possibilities: + // * we've run on i (so we interrupted any sibling & writes are visible) + // * At some point in time T1, we read a value of cpuset.cpus disallowing i. + // + // Linux kernel details: all writes and reads to cpuset.cpus are + // serialized on a mutex (called callback_mutex). Because of the + // memory barrier above, our writes certainly happened-before T1. + // + // Moreover, whoever wrote cpuset.cpus to ban i looped over our + // threads in kernel, migrating all threads away from i and setting + // their masks to disallow i. So once that loop is known to be + // over, any thread that was running on i has been interrupted at + // least once, and migrated away. It is possible a second + // subsequent change to cpuset.cpus (at time T2) re-allowed i, but + // serialization of cpuset.cpus changes guarantee that our writes + // are visible at T2, and since migration is a barrier, any sibling + // migrated after T2 to cpu i will also see our writes. + // + // So we just have to make sure the update loop from whoever wrote + // cpuset.cpus at T1 is completed. That loop executes under a + // second mutex (cgroup_mutex.) So if we take that mutex ourselves, + // we can be sure that update loop at T1 is done. So read + // /proc/self/cpuset. We don't care what it says; as long as it takes the lock + // in question. This guarantees that every thread is either running on a cpu + // we visited, or received a cpuset.cpus rewrite that happened strictly after + // our writes. + + using tcmalloc::tcmalloc_internal::signal_safe_close; + using tcmalloc::tcmalloc_internal::signal_safe_open; + using tcmalloc::tcmalloc_internal::signal_safe_read; + int fd = signal_safe_open("/proc/self/cpuset", O_RDONLY); + CHECK_CONDITION(fd >= 0); + + char c; + CHECK_CONDITION(1 == signal_safe_read(fd, &c, 1, nullptr)); + + CHECK_CONDITION(0 == signal_safe_close(fd)); + + // Try to go back to what we originally had before Fence. + if (0 != sched_setaffinity(0, sizeof(cpu_set_t), &old)) { + CHECK_CONDITION(EINVAL == errno); + // The original set is no longer valid, which should only happen if + // cpuset.cpus was changed at some point in Fence. If that happened and we + // didn't fence, our control plane would have rewritten our affinity mask to + // everything in cpuset.cpus, so do that. + cpu_set_t set; + CPU_ZERO(&set); + for (int i = 0; i < absl::base_internal::NumCPUs(); ++i) { + CPU_SET(i, &set); + } + CHECK_CONDITION(0 == sched_setaffinity(0, sizeof(cpu_set_t), &set)); + } +} + +// Interrupt every concurrently running sibling thread on any cpu in +// "cpus", and guarantee our writes up til now are visible to every +// other CPU. (cpus == NULL is equivalent to all CPUs.) +static void FenceInterruptCPUs(const cpu_set_t *cpus) { + CHECK_CONDITION(IsFast()); + + SlowFence(cpus); +} + +void Fence() { + CompilerBarrier(); + + // Other operations (or all in RSEQ mode) might just be running on another + // CPU. Do something about that: use RSEQ::Fence() to just send interrupts + // and restart any such operation. + FenceInterruptCPUs(nullptr); +} + +void FenceCpu(int cpu) { + // Prevent compiler re-ordering of code below. In particular, the call to + // GetCurrentCpu must not appear in assembly program order until after any + // code that comes before FenceCpu in C++ program order. + CompilerBarrier(); + + // A useful fast path: nothing needs doing at all to order us with respect + // to our own CPU. + if (GetCurrentCpu() == cpu) { + return; + } + cpu_set_t set; + CPU_ZERO(&set); + CPU_SET(cpu, &set); + FenceInterruptCPUs(&set); +} + +} // namespace percpu +} // namespace subtle +} // namespace tcmalloc diff --git a/tcmalloc/internal/percpu.h b/tcmalloc/internal/percpu.h new file mode 100644 index 000000000..5869d3614 --- /dev/null +++ b/tcmalloc/internal/percpu.h @@ -0,0 +1,248 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_PERCPU_H_ +#define TCMALLOC_INTERNAL_PERCPU_H_ + +#define PERCPU_TCMALLOC_FIXED_SLAB_SHIFT 18 + +// PERCPU_RSEQ_SUPPORTED_PLATFORM defines whether or not we have an +// implementation for the target OS and architecture. +#if defined(__linux__) && (defined(__x86_64__) || defined(__PPC64__)) +#define PERCPU_RSEQ_SUPPORTED_PLATFORM 1 +#else +#define PERCPU_RSEQ_SUPPORTED_PLATFORM 0 +#endif + +#define PERCPU_RSEQ_VERSION 0x0 +#define PERCPU_RSEQ_FLAGS 0x0 +#if defined(__x86_64__) +#define PERCPU_RSEQ_SIGNATURE 0x53053053 +#elif defined(__ppc__) +#define PERCPU_RSEQ_SIGNATURE 0x0FE5000B +#else +// Rather than error, allow us to build, but with an invalid signature. +#define PERCPU_RSEQ_SIGNATURE 0x0 +#endif + +// The constants above this line must be macros since they are shared with the +// RSEQ assembly sources. +#ifndef __ASSEMBLER__ + +#ifdef __linux__ +#include +#endif + +#include +#include +#include + +#include "absl/base/dynamic_annotations.h" +#include "absl/base/internal/per_thread_tls.h" +#include "absl/base/macros.h" +#include "absl/base/optimization.h" +#include "tcmalloc/internal/atomic_danger.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/linux_syscall_support.h" +#include "tcmalloc/internal/logging.h" + +// PERCPU_USE_RSEQ defines whether TCMalloc support for RSEQ on the target +// architecture exists. We currently only provide RSEQ for 64-bit x86 and PPC +// binaries. +#if !defined(PERCPU_USE_RSEQ) +#if (ABSL_PER_THREAD_TLS == 1) && (PERCPU_RSEQ_SUPPORTED_PLATFORM == 1) +#define PERCPU_USE_RSEQ 1 +#else +#define PERCPU_USE_RSEQ 0 +#endif +#endif // !defined(PERCPU_USE_RSEQ) + +namespace tcmalloc { +namespace subtle { +namespace percpu { + +// Internal state used for tracking initialization of RseqCpuId() +static const int kCpuIdUnsupported = -2; +static const int kCpuIdUninitialized = -1; +static const int kCpuIdInitialized = 0; + +#if PERCPU_USE_RSEQ +extern "C" ABSL_PER_THREAD_TLS_KEYWORD volatile kernel_rseq __rseq_abi; + +static inline int RseqCpuId() { return __rseq_abi.cpu_id; } +#else // !PERCPU_USE_RSEQ +#define ATTR_RSEQ +static inline int RseqCpuId() { return kCpuIdUnsupported; } +#endif + +typedef int (*OverflowHandler)(int cpu, size_t cl, void *item); +typedef void *(*UnderflowHandler)(int cpu, size_t cl); + +// Functions below are implemented in the architecture-specific percpu_rseq_*.S +// files. +extern "C" { +int TcmallocSlab_PerCpuCmpxchg64(int target_cpu, intptr_t *p, intptr_t old_val, + intptr_t new_val); +int TcmallocSlab_Push(void *ptr, size_t cl, void *item, size_t shift, + OverflowHandler f); +int TcmallocSlab_Push_FixedShift(void *ptr, size_t cl, void *item, + OverflowHandler f); +void *TcmallocSlab_Pop(void *ptr, size_t cl, UnderflowHandler f, size_t shift); +void *TcmallocSlab_Pop_FixedShift(void *ptr, size_t cl, UnderflowHandler f); + +// Push a batch for a slab which the Shift equal to +// PERCPU_TCMALLOC_FIXED_SLAB_SHIFT +size_t TcmallocSlab_PushBatch_FixedShift(void *ptr, size_t cl, void **batch, + size_t len); +// Pop a batch for a slab which the Shift equal to +// PERCPU_TCMALLOC_FIXED_SLAB_SHIFT +size_t TcmallocSlab_PopBatch_FixedShift(void *ptr, size_t cl, void **batch, + size_t len); +} +// NOTE: We skirt the usual naming convention slightly above using "_" to +// increase the visibility of functions embedded into the root-namespace (by +// virtue of C linkage) in the supported case. + +inline int GetCurrentCpuUnsafe() { +// On PowerPC, Linux maintains the current CPU in the bottom 12 bits of special +// purpose register SPRG3, which is readable from user mode. References: +// +// This is intended for VDSO syscalls, but is much faster if we simply inline it +// here, presumably due to the function call and null-check overheads of the +// VDSO version. As of 2014-07 the CPU time costs are something like 1.2 ns for +// the inline version vs 12 ns for VDSO. +#if defined(__PPC64__) && defined(__linux__) + uint64_t spr; + + // Mark the asm as volatile, so that it is not hoisted out of loops. + asm volatile("mfspr %0, 0x103;" : "=r"(spr)); + + return spr & 0xfff; +#else + // Elsewhere, use the rseq mechanism. + return RseqCpuId(); +#endif +} + +inline int GetCurrentCpu() { + // We can't use the unsafe version unless we have the appropriate version of + // the rseq extension. This also allows us a convenient escape hatch if the + // kernel changes the way it uses special-purpose registers for CPU IDs. + int cpu = GetCurrentCpuUnsafe(); + + // We open-code the check for fast-cpu availability since we do not want to + // force initialization in the first-call case. This so done so that we can + // use this in places where it may not always be safe to initialize and so + // that it may serve in the future as a proxy for callers such as + // CPULogicalId() without introducing an implicit dependence on the fast-path + // extensions. Initialization is also simply unneeded on some platforms. + if (ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized)) { + return cpu; + } + +#ifdef TCMALLOC_HAVE_SCHED_GETCPU + cpu = sched_getcpu(); + ASSERT(cpu >= 0); +#endif // TCMALLOC_HAVE_SCHED_GETCPU + + return cpu; +} + +bool InitFastPerCpu(); + +inline bool IsFast() { + if (!PERCPU_USE_RSEQ) { + return false; + } + + int cpu = RseqCpuId(); + + if (ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized)) { + return true; + } else if (ABSL_PREDICT_FALSE(cpu == kCpuIdUnsupported)) { + return false; + } else { + // Sets 'cpu' for next time, and calls EnsureSlowModeInitialized if + // necessary. + return InitFastPerCpu(); + } +} + +// As IsFast(), but if this thread isn't already initialized, will not +// attempt to do so. +inline bool IsFastNoInit() { + if (!PERCPU_USE_RSEQ) { + return false; + } + int cpu = RseqCpuId(); + return ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized); +} + +// A barrier that prevents compiler reordering. +inline void CompilerBarrier() { +#if defined(__GNUC__) + __asm__ __volatile__("" : : : "memory"); +#else + std::atomic_thread_fence(std::memory_order_seq_cst); +#endif +} + +// Internal tsan annotations, do not use externally. +// Required as tsan does not natively understand RSEQ. +#ifdef THREAD_SANITIZER +extern "C" { +void __tsan_acquire(void *addr); +void __tsan_release(void *addr); +} +#endif + +// TSAN relies on seeing (and rewriting) memory accesses. It can't +// get at the memory acccesses we make from RSEQ assembler sequences, +// which means it doesn't know about the semantics our sequences +// enforce. So if we're under TSAN, add barrier annotations. +inline void TSANAcquire(void *p) { +#ifdef THREAD_SANITIZER + __tsan_acquire(p); +#endif +} + +inline void TSANRelease(void *p) { +#ifdef THREAD_SANITIZER + __tsan_release(p); +#endif +} + +inline void TSANMemoryBarrierOn(void *p) { + TSANAcquire(p); + TSANRelease(p); +} + +// These methods may *only* be called if IsFast() has been called by the current +// thread (and it returned true). +inline int CompareAndSwapUnsafe(int target_cpu, std::atomic *p, + intptr_t old_val, intptr_t new_val) { + TSANMemoryBarrierOn(p); + return TcmallocSlab_PerCpuCmpxchg64( + target_cpu, tcmalloc_internal::atomic_danger::CastToIntegral(p), old_val, + new_val); +} + +void FenceCpu(int cpu); + +} // namespace percpu +} // namespace subtle +} // namespace tcmalloc + +#endif // !__ASSEMBLER__ +#endif // TCMALLOC_INTERNAL_PERCPU_H_ diff --git a/tcmalloc/internal/percpu_rseq_asm.S b/tcmalloc/internal/percpu_rseq_asm.S new file mode 100644 index 000000000..2de53725d --- /dev/null +++ b/tcmalloc/internal/percpu_rseq_asm.S @@ -0,0 +1,39 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Single file to include target specific implementations for percpu. + +#include "tcmalloc/internal/percpu.h" + +#if PERCPU_RSEQ_SUPPORTED_PLATFORM +#if defined(__x86_64__) +#include "tcmalloc/internal/percpu_rseq_x86_64.S" +#elif defined(__ppc__) +#include "tcmalloc/internal/percpu_rseq_ppc.S" +#else +#error "RSEQ support expected, but not found." +#endif +#endif // PERCPU_RSEQ_SUPPORTED_PLATFORM + +// We do not need an executable stack. Put this outside the +// architecture-specific region above in order to suppress "missing +// .note.GNU-stack section implies executable stack" errors. +// +// Cf. http://en.chys.info/2010/12/note-gnu-stack/ +#if defined(__arm__) || defined(__PPC64__) +.section .note.GNU-stack, "", %progbits +#else +.section .note.GNU-stack, "", @progbits +#endif // __arm__ || __PPC64__ + diff --git a/tcmalloc/internal/percpu_rseq_ppc.S b/tcmalloc/internal/percpu_rseq_ppc.S new file mode 100644 index 000000000..62be2720d --- /dev/null +++ b/tcmalloc/internal/percpu_rseq_ppc.S @@ -0,0 +1,602 @@ +/* + * Copyright 2019 The TCMalloc Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Rseq critical section functions and restart handlers. +// +// They must also avoid writing the nonvolatile and reserved general purpose +// registers defined by the Power Architecture 64-Bit ELF V2 ABI +// +// * r1-r2 +// * r13 +// * r14-r31 +// +// Finally, note that the restart handler reserves the right to clobber +// condition registers. This means that critical section functions must not +// explicitly or implicitly read condition registers outside of their +// [start, limit) critical regions. + +#ifndef __ppc__ +#error "percpu_rseq_ppc.S should only be included for PPC builds" +#endif + +#include "tcmalloc/internal/percpu.h" + +// Use the ELFv2 ABI. +.abiversion 2 +.section google_malloc, "ax" + +//////////////////////////////////////////////////////////////////////// +// Macros +//////////////////////////////////////////////////////////////////////// + +/* + * Provide a directive to specify the size of symbol "label", relative to the + * current location and its start. + */ +#define ENCODE_SIZE(label) .size label, . - label; + +// Place the CPU number into the bottom 12 bits of dst. The upper 52 bits are +// unspecified. +// +// See GetCurrentCpu() for notes on the implementation. +#define GET_CPU_UNMASKED(dst) \ + mfspr dst, 259 + +// Given an unmasked CPU number, put the interesting parts into dst. +#define MASK_CPU(dst, src) \ + clrldi dst, src, 52 + +// Like GET_CPU_UNMASKED, but guarantees that the upper bits are cleared. May +// be slower than the unmasked version. +#define GET_CPU(dst) \ + GET_CPU_UNMASKED(dst); \ + MASK_CPU(dst, dst) + +// Given an unmasked CPU number, calculate the offset of that CPU's word from +// CPU 0's into dst. +#define CALCULATE_PER_CPU_WORD_OFFSET(dst, unmasked_cpu) \ + clrlsldi dst, unmasked_cpu, 52, PERCPU_BYTES_PER_REGION_SHIFT + +// This is part of the upstream rseq ABI. The 4 bytes prior to the abort IP +// must match PERCPU_RSEQ_SIGNATURE (as configured by our rseq syscall's +// signature parameter). This signature is used to annotate valid abort IPs +// (since rseq_cs could live in a user-writable segment). +#define SIGN_ABORT() \ + .long PERCPU_RSEQ_SIGNATURE; + +// DEFINE_UPSTREAM_CS triggers the generation of rseq_cs table (the triple of +// start, commit, abort IPs) and a trampoline function. +// +// Upstream API Exposition: +// +// START_RSEQ() // vvvvv emits a bunch of things +// global entry point: +// TOC setup +// METHOD_critical_abort: +// local entry point: +// store rseq_cs to __rseq_abi.rseq_cs, starting restartable sequence +// METHOD_start: // Emitted as part of START_RSEQ() +// // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// +// GET_CPU...() // Reads current CPU +// ... +// single store // Commits sequence +// METHOD_critical_limit: +// ...return... +// +// START_RSEQ does several things: +// * We need to set up the TOC pointer for global entry points. +// * When restarting, we return to the local entry point, since the TOC pointer +// is left intact from the restart. METHOD_critical_abort and local entry +// point are therefore the same address. +// * It stores to the TLS to register that we're in a restartable sequence with +// the kernel. +// +// This process is assisted by the DEFINE_UPSTREAM_CS macro, which encodes a +// (rodata) constant table, whose address is used to start the critical +// section, and the abort trampoline. +// +// The trampoline is used because: +// 1. Restarts are expected to be rare, so the extra jump when restarting is +// expected to be infrequent. +// 2. The upstream restartable sequence implementation expects the trailing 4 +// bytes of the abort PC to be "signed" (to prevent manipulation of the PC +// to an arbitrary choice). For us, this is PERCPU_RSEQ_SIGNATURE. This +// value is passed to the kernel during configuration of the rseq syscall. +// This would either need to be encoded as a nop* at the start of every +// restartable sequence, increasing instruction cache pressure, or placed +// directly before the entry point. +// +// * The upstream rseq protocol appears to be converging on using a trap +// instruction (twui), so we cannot allow it to appear anywhere in our +// actual executed path. +// +// Upon restart, the (upstream) kernel API clears the per-thread restartable +// sequence state. We return to METHOD_abort (rather than METHOD_start), as we +// need to reinitialize this value. + +// TODO(b/141629158): __rseq_cs only needs to be writeable to allow for +// relocations, but could be read-only for non-PIE builds. +#define DEFINE_UPSTREAM_CS(label) \ + .pushsection __rseq_cs, "aw"; \ + .balign 32; \ + .protected __rseq_cs_##label; \ + .type __rseq_cs_##label,@object; \ + .size __rseq_cs_##label,32; \ + __rseq_cs_##label: \ + .long PERCPU_RSEQ_VERSION, PERCPU_RSEQ_FLAGS; \ + .quad .L##label##_critical_start; \ + .quad .L##label##_critical_limit - .L##label##_critical_start; \ + .quad label##_trampoline; \ + .reloc 0, R_PPC64_NONE, .L##label##array; \ + .popsection; \ + .pushsection __rseq_cs_ptr_array, "aw"; \ + .L##label##array: \ + .quad __rseq_cs_##label; \ + .popsection; \ + .pushsection rseq_trampoline, "ax"; \ + SIGN_ABORT(); \ + .globl label##_trampoline; \ + .type label##_trampoline, @function; \ +label##_trampoline: \ + .cfi_startproc; \ + b .L##label##_critical_abort; \ + .cfi_endproc; \ + .size label##_trampoline, . - label##_trampoline; \ + .popsection + +// With PIE: We have initial-exec TLS, even in the presence of position +// independent code. +#if !defined(__PIC__) || defined(__PIE__) + +#define START_RSEQ(label) \ + .L##label##_gep0: \ + addis %r2, %r12, .TOC.-.L##label##_gep0@ha; \ + addi %r2, %r2, .TOC.-.L##label##_gep0@l; \ + .L##label##_critical_abort: \ + .L##label##_lep0: \ + .localentry label,.-label; \ + addis %r9, %r2, __rseq_cs_##label@toc@ha; \ + addi %r9, %r9, __rseq_cs_##label@toc@l; \ + addis %r10, %r13, __rseq_abi@tprel@ha; \ + addi %r10, %r10, __rseq_abi@tprel@l; \ + std %r9, 8(%r10); \ + .L##label##_critical_start: + +#else /* !defined(__PIC__) || defined(__PIE__) */ + +// Handle non-initial exec TLS. When performance matters, we should be using +// initial-exec TLS. +// +// We need to caller-save r3-r8, as they are our arguments to the actual +// restartable sequence code. + +#define START_RSEQ(label) \ + .L##label##_gep0: \ + addis %r2, %r12, .TOC.-.L##label##_gep0@ha; \ + addi %r2, %r2, .TOC.-.L##label##_gep0@l; \ + .L##label##_critical_abort: \ + .L##label##_lep0: \ + .localentry label,.-label; \ + mflr 0; \ + std %r0, 0x10(1); \ + std %r3, -0x10(1); \ + std %r4, -0x18(1); \ + std %r5, -0x20(1); \ + std %r6, -0x28(1); \ + std %r7, -0x30(1); \ + std %r8, -0x38(1); \ + stdu %r1, -0x200(1); \ + bl tcmalloc_tls_fetch_pic; \ + nop; \ + mr %r10, %r3; \ + addi %r1, %r1, 0x200; \ + ld %r8, -0x38(1); \ + ld %r7, -0x30(1); \ + ld %r6, -0x28(1); \ + ld %r5, -0x20(1); \ + ld %r4, -0x18(1); \ + ld %r3, -0x10(1); \ + ld %r0, 0x10(1); \ + mtlr 0; \ + addis %r9, %r2, __rseq_cs_##label@toc@ha; \ + addi %r9, %r9, __rseq_cs_##label@toc@l; \ + std %r9, 8(%r10); \ + .L##label##_critical_start: + +#endif + +//////////////////////////////////////////////////////////////////////// +// TcmallocSlab_PerCpuCmpxchg64 +//////////////////////////////////////////////////////////////////////// + +.globl TcmallocSlab_PerCpuCmpxchg64 +.type TcmallocSlab_PerCpuCmpxchg64, @function +TcmallocSlab_PerCpuCmpxchg64: +.LTcmallocSlab_PerCpuCmpxchg64_entry: + .cfi_startproc + // Register use: + // + // * r3: (Argument: int64) target_cpu + // * r4: (Argument: intptr_t*) p + // * r5: (Argument: intptr_t) old_val + // * r6: (Argument: intptr_t) new_val + // * r7: The current CPU number. + // * r8: The current value of *p. + // + + START_RSEQ(TcmallocSlab_PerCpuCmpxchg64) + + // Are we running on the target CPU? + GET_CPU(%r7) + cmpd %r7, %r3 + bne .LCAS_wrong_cpu + + // Load the current value of *p. + ld %r8, 0(%r4) + + // Is the value up to date? + cmpd %r8, %r5 + bne .LCAS_wrong_value + + // Store the new value, committing the operation. + std %r6, 0(%r4) +.LTcmallocSlab_PerCpuCmpxchg64_critical_limit: + + // Return the target CPU, which is already in r3. + blr + +.LCAS_wrong_cpu: + // Return the current CPU. + mr %r3, %r7 + blr + +.LCAS_wrong_value: + // Return -1. + li %r3, -1 + blr + +.LTcmallocSlab_PerCpuCmpxchg64_function_limit: + .cfi_endproc +ENCODE_SIZE(TcmallocSlab_PerCpuCmpxchg64); +DEFINE_UPSTREAM_CS(TcmallocSlab_PerCpuCmpxchg64); + + +//////////////////////////////////////////////////////////////////////// +// TcmallocSlab_Push +//////////////////////////////////////////////////////////////////////// + +.globl TcmallocSlab_Push +.type TcmallocSlab_Push, @function +TcmallocSlab_Push: +.LTcmallocSlab_Push_entry: + .cfi_startproc + // Arguments use: + // * r3: (Argument: Slabs*) cpu_0_slab_ptr + // * r4: (Argument: uintptr_t) cl + // * r5: (Argument: uintptr_t) p + // * r6: (Argument: size_t) shift + // * r7: (Argument: uintptr_t) f + // Return value: current CPU + // Available r8 r9 r10 r11 r12 + // Note that r12 may be overwritten in rseq_restart_address_internal so + // cannot be relied upon across restartable sequence boundaries. + + START_RSEQ(TcmallocSlab_Push) + + GET_CPU(%r8) // r8 = current CPU, includes MASK operation + sld %r9, %r8, %r6 // r9 = r8 << shift (r6) + add %r9, %r3, %r9 // r9 = start of this CPU region + rldicr %r10, %r4, 3, 60 // r10 = header offset for class size cl (r4) + add %r10, %r9, %r10 // r10 = slab header addr (class offset + CPU base) + lhz %r12, 0(%r10) // r12 = current index + lhz %r11, 6(%r10) // r11 = length + cmpld %cr7, %r11, %r12 // compare current index with length + ble %cr7, .LTcmallocSlab_Push_no_capacity + rldicr %r11, %r12, 3, 60 // r11 = offset of current index + addi %r12, %r12, 1 // current index += 1 + stdx %r5, %r9, %r11 // store pointer p (r5) into current offset + sth %r12, 0(%r10) // update current index + +.LTcmallocSlab_Push_critical_limit: + mr %r3, %r8 // Return current CPU in r3 + blr + +.LTcmallocSlab_Push_no_capacity: + mr %r3, %r8 // Place current CPU in r3 + // r7 already contains target function + b .LPushOverflowTrampoline + +.LTcmallocSlab_Push_function_limit: + .cfi_endproc +ENCODE_SIZE(TcmallocSlab_Push); +DEFINE_UPSTREAM_CS(TcmallocSlab_Push); + +//////////////////////////////////////////////////////////////////////// +// TcmallocSlab_Push_FixedShift +//////////////////////////////////////////////////////////////////////// + +.globl TcmallocSlab_Push_FixedShift +.type TcmallocSlab_Push_FixedShift, @function +TcmallocSlab_Push_FixedShift: +.LTcmallocSlab_Push_FixedShift_entry: + .cfi_startproc + // Arguments use: + // * r3: (Argument: Slabs*) cpu_0_slab_ptr + // * r4: (Argument: uintptr_t) cl + // * r5: (Argument: uintptr_t) p + // * r6: (Argument: uintptr_t) f + + START_RSEQ(TcmallocSlab_Push_FixedShift) + + GET_CPU_UNMASKED(%r7) // r7 = unmasked CPU + // Mask upper 52 bits of %r7 and shift left in single + // operation. Removes the need to have a separate + // MASK operation on the critical path. + clrlsldi %r8, %r7, 52, PERCPU_TCMALLOC_FIXED_SLAB_SHIFT + add %r8, %r3, %r8 // r8 = start of this CPU region + rldicr %r9, %r4, 3, 60 // r9 = start of header + add %r9, %r8, %r9 // r9 = slab header addr + lhz %r10, 0(%r9) // r10 = current index + lhz %r11, 6(%r9) // r11 = end index + cmpld %cr7, %r11, %r10 // Check for space + ble %cr7, .LTcmallocSlab_Push_FixedShift_no_capacity + rldicr %r11, %r10, 3, 60 // r11 = offset of current index + addi %r10, %r10, 1 // current index ++ + stdx %r5, %r8, %r11 // store the item (from r5) + sth %r10, 0(%r9) // store current index + +.LTcmallocSlab_Push_FixedShift_critical_limit: + MASK_CPU(%r3, %r7) // Return and mask CPU into %r3 + blr + +.LTcmallocSlab_Push_FixedShift_no_capacity: + MASK_CPU(%r3, %r7) // Move and mask CPU into %r3 + mr %r7, %r6 // Move target function into r7 + b .LPushOverflowTrampoline + +.LTcmallocSlab_Push_FixedShift_function_limit: + .cfi_endproc +ENCODE_SIZE(TcmallocSlab_Push_FixedShift); +DEFINE_UPSTREAM_CS(TcmallocSlab_Push_FixedShift); + + +//////////////////////////////////////////////////////////////////////// +// TcmallocSlab_Pop +//////////////////////////////////////////////////////////////////////// + +.globl TcmallocSlab_Pop +.type TcmallocSlab_Pop, @function +TcmallocSlab_Pop: +.LTcmallocSlab_Pop_entry: + .cfi_startproc + // Arguments use: + // * r3: (Argument: Slabs*) cpu_0_slab_ptr + // * r4: (Argument: uintptr_t) cl + // * r5: (Argument: uintptr_t) f + // * r6: (Argument: size_t) shift + // Available r7 r8 r9 r10 r11 + // r12 can be used as a temporary within rseq + + START_RSEQ(TcmallocSlab_Pop) + + GET_CPU(%r7) // r7 = CPU, includes mask operation + sld %r12, %r7, %r6 // r12 = CPU shifted by shift (r6) + add %r12, %r3, %r12 // r12 = start of this CPU region + rldicr %r8, %r4, 3, 60 // r8 = offset to class size + add %r8, %r12, %r8 // r8 = slab header addr for class size + lhz %r9, 0(%r8) // r9 = current index + lhz %r10, 4(%r8) // r10 = begin + cmpld %cr7, %r10, %r9 // Check that we have items to pop + bge %cr7, .LTcmallocSlab_Pop_no_item + subi %r9, %r9, 1 // r9 = current index -- + rldicr %r10, %r9, 3, 60 // r10 = offset to current item + ldx %r11, %r12, %r10 // load the item from base + index + sth %r9, 0(%r8) // store current index + +.LTcmallocSlab_Pop_critical_limit: + // Move the item into r3, now that it's safe to do so. + mr %r3, %r11 + blr + +.LTcmallocSlab_Pop_no_item: + mr %r3, %r7 // Place CPU into r3 + b .LPopUnderflowTrampoline + +.LTcmallocSlab_Pop_function_limit: + .cfi_endproc +ENCODE_SIZE(TcmallocSlab_Pop); +DEFINE_UPSTREAM_CS(TcmallocSlab_Pop); + +//////////////////////////////////////////////////////////////////////// +// TcmallocSlab_Pop_FixedShift +//////////////////////////////////////////////////////////////////////// + +.globl TcmallocSlab_Pop_FixedShift +.type TcmallocSlab_Pop_FixedShift, @function +TcmallocSlab_Pop_FixedShift: +.LTcmallocSlab_Pop_FixedShift_entry: + .cfi_startproc + // Arguments use: + // * r3: (Argument: Slabs*) cpu_0_slab_ptr + // * r4: (Argument: uintptr_t) cl + // * r5: (Argument: uintptr_t) f + + START_RSEQ(TcmallocSlab_Pop_FixedShift) + + GET_CPU_UNMASKED(%r6) // r6 = current CPU + // Following instruction combines mask and shift + clrlsldi %r7, %r6, 52, PERCPU_TCMALLOC_FIXED_SLAB_SHIFT // r7 = header offset + add %r7, %r3, %r7 // r7 = start of this CPU region + rldicr %r8, %r4, 3, 60 // r8 = offset of size class + add %r8, %r7, %r8 // r8 = slab header addr + lhz %r9, 0(%r8) // r9 = current index + lhz %r10, 4(%r8) // r10 = begin index + cmpld %cr7, %r10, %r9 // Check that there are elements available + bge %cr7, .LTcmallocSlab_Pop_FixedShift_no_item + subi %r9, %r9, 1 // current index -- + rldicr %r10, %r9, 3, 60 // r10 = offset of current index + ldx %r11, %r7, %r10 // r11 = load the item + sth %r9, 0(%r8) // update current index + +.LTcmallocSlab_Pop_FixedShift_critical_limit: + // Move the item into r3, now that it's safe to do so. + mr %r3, %r11 + blr + +.LTcmallocSlab_Pop_FixedShift_no_item: + MASK_CPU(%r3, %r6) // Extract CPU from unmasked value in %r6 + b .LPopUnderflowTrampoline + +.LTcmallocSlab_Pop_FixedShift_function_limit: + .cfi_endproc +ENCODE_SIZE(TcmallocSlab_Pop_FixedShift); +DEFINE_UPSTREAM_CS(TcmallocSlab_Pop_FixedShift); + +//////////////////////////////////////////////////////////////////////// +// TcmallocSlab_PushBatch_FixedShift +//////////////////////////////////////////////////////////////////////// + +.globl TcmallocSlab_PushBatch_FixedShift +.type TcmallocSlab_PushBatch_FixedShift, @function +TcmallocSlab_PushBatch_FixedShift: +.LTcmallocSlab_PushBatch_FixedShift_entry: + .cfi_startproc + // Arguments use: + // * r3: (Argument: Slabs*) cpu_0_slab_ptr + // * r4: (Argument: uintptr_t) cl + // * r5: (Argument: uintptr_t) batch + // * r6: (Argument: uintptr_t) len + + START_RSEQ(TcmallocSlab_PushBatch_FixedShift) + + GET_CPU_UNMASKED(%r7) + clrlsldi %r8, %r7, 52, PERCPU_TCMALLOC_FIXED_SLAB_SHIFT + add %r8, %r3, %r8 // r8 - start of this CPU region + sldi %r9, %r4, 3 + add %r9, %r8, %r9 // r9 - slab header addr + lhz %r10, 0(%r9) // r10 - current + lhz %r11, 6(%r9) // r11 - end + sldi %r7, %r6, 3 // r7 - len * 8 + cmpld %cr7, %r11, %r10 // current < end? + ble %cr7, .LTcmallocSlab_PushBatch_FixedShift_critical_limit + sub %r11, %r11, %r10 // r11 - available capacity + // r11 = min(r11, r6) + cmpld %cr7, %r6, %r11 + bge %cr7, .LTcmallocSlab_PushBatch_FixedShift_min + mr %r11, %r6 +.LTcmallocSlab_PushBatch_FixedShift_min: + add %r11, %r10, %r11 + sldi %r11, %r11, 3 + sldi %r10, %r10, 3 + + // At this point: + // r5 - batch, r7 - offset in the batch + // r8 - cpu region, r10 - offset into the cpu region, r11 - limit of offset +.LTcmallocSlab_PushBatch_FixedShift_loop: + subi %r7, %r7, 8 + ldx %r12, %r5, %r7 // load the item + stdx %r12, %r8, %r10 // store the item + addi %r10, %r10, 8 + cmpld %cr7, %r10, %r11 + bne %cr7, .LTcmallocSlab_PushBatch_FixedShift_loop + rotrdi %r10, %r10, 3 + sth %r10, 0(%r9) // update current + +.LTcmallocSlab_PushBatch_FixedShift_critical_limit: + // return r6 - r7 / 8 + rotrdi %r7, %r7, 3 + sub %r3, %r6, %r7 + blr + +.LTcmallocSlab_PushBatch_FixedShift_function_limit: + .cfi_endproc +ENCODE_SIZE(TcmallocSlab_PushBatch_FixedShift); +DEFINE_UPSTREAM_CS(TcmallocSlab_PushBatch_FixedShift); + +//////////////////////////////////////////////////////////////////////// +// TcmallocSlab_PopBatch_FixedShift +//////////////////////////////////////////////////////////////////////// + +.globl TcmallocSlab_PopBatch_FixedShift +.type TcmallocSlab_PopBatch_FixedShift, @function +TcmallocSlab_PopBatch_FixedShift: +.LTcmallocSlab_PopBatch_FixedShift_entry: + .cfi_startproc + // Arguments use: + // * r3: (Argument: Slabs*) cpu_0_slab_ptr + // * r4: (Argument: uintptr_t) cl + // * r5: (Argument: uintptr_t) batch + // * r6: (Argument: uintptr_t) len + + START_RSEQ(TcmallocSlab_PopBatch_FixedShift) + + GET_CPU_UNMASKED(%r7) + clrlsldi %r7, %r7, 52, PERCPU_TCMALLOC_FIXED_SLAB_SHIFT + add %r7, %r3, %r7 // r7 - start of this CPU region + sldi %r8, %r4, 3 + add %r8, %r7, %r8 // r8 - slab header addr + lhz %r9, 0(%r8) // r9 - current + lhz %r10, 4(%r8) // r10 - begin + li %r11, 0 // current position in batch + cmpld %cr7, %r10, %r9 + bge %cr7, .LTcmallocSlab_PopBatch_FixedShift_critical_limit + sub %r10, %r9, %r10 // r10 - available items + // r10 = min(r10, r6) + cmpld %cr7, %r6, %r10 + bge %cr7, .LTcmallocSlab_PopBatch_FixedShift_min + mr %r10, %r6 +.LTcmallocSlab_PopBatch_FixedShift_min: + sub %r10, %r9, %r10 + sldi %r10, %r10, 3 + sldi %r9, %r9, 3 + + // At this point: + // r5 - batch, r11 - offset in the batch + // r7 - cpu region, r9 - offset into the cpu region, r10 - limit of offset +.LTcmallocSlab_PopBatch_FixedShift_loop: + subi %r9, %r9, 8 + ldx %r12, %r7, %r9 // load the item + stdx %r12, %r5, %r11 // store the item + addi %r11, %r11, 8 + cmpld %cr7, %r9, %r10 + bne %cr7, .LTcmallocSlab_PopBatch_FixedShift_loop + rotrdi %r9, %r9, 3 + sth %r9, 0(%r8) // update current + +.LTcmallocSlab_PopBatch_FixedShift_critical_limit: + rotrdi %r3, %r11, 3 + blr + +.LTcmallocSlab_PopBatch_FixedShift_function_limit: + .cfi_endproc +ENCODE_SIZE(TcmallocSlab_PopBatch_FixedShift); +DEFINE_UPSTREAM_CS(TcmallocSlab_PopBatch_FixedShift); + + // Input: r7 points to the function to tail call. r3...r6 are args for it. +.LPushOverflowTrampoline: + mtctr %r7 + mr %r12, %r7 // Callee expects r12 to point to its first instruction. + bctr + + // Input: r5 points to the function to tail call. r3...r4 are args for it. +.LPopUnderflowTrampoline: + mtctr %r5 + mr %r12, %r5 // Callee expects r12 to point to its first instruction. + bctr + +.section .note.GNU-stack,"",%progbits + diff --git a/tcmalloc/internal/percpu_rseq_unsupported.cc b/tcmalloc/internal/percpu_rseq_unsupported.cc new file mode 100644 index 000000000..7be2802ce --- /dev/null +++ b/tcmalloc/internal/percpu_rseq_unsupported.cc @@ -0,0 +1,81 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Provides skeleton RSEQ functions which raise a hard error in the case of +// being erroneously called on an unsupported platform. + +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/percpu.h" + +#if !PERCPU_RSEQ_SUPPORTED_PLATFORM + +namespace tcmalloc { +namespace subtle { +namespace percpu { + +static void Unsupported() { + Log(kCrash, __FILE__, __LINE__, + "RSEQ function called on unsupported platform."); +} + +int TcmallocSlab_PerCpuCmpxchg64(int target_cpu, intptr_t *p, intptr_t old_val, + intptr_t new_val) { + Unsupported(); + return -1; +} + +int TcmallocSlab_Push(void *ptr, size_t cl, void *item, size_t shift, + OverflowHandler f) { + Unsupported(); + return -1; +} + +int TcmallocSlab_Push_FixedShift(void *ptr, size_t cl, void *item, + OverflowHandler f) { + Unsupported(); + return -1; +} + +void *TcmallocSlab_Pop(void *ptr, size_t cl, UnderflowHandler f, size_t shift) { + Unsupported(); + return nullptr; +} + +void *TcmallocSlab_Pop_FixedShift(void *ptr, size_t cl, UnderflowHandler f) { + Unsupported(); + return nullptr; +} + +size_t TcmallocSlab_PushBatch_FixedShift(void *ptr, size_t cl, void **batch, + size_t len) { + Unsupported(); + return 0; +} + +size_t TcmallocSlab_PopBatch_FixedShift(void *ptr, size_t cl, void **batch, + size_t len) { + Unsupported(); + return 0; +} + +int PerCpuReadCycleCounter(int64_t *cycles) { + Unsupported(); + return -1; +} + +} // namespace percpu +} // namespace subtle +} // namespace tcmalloc + +#endif // !PERCPU_RSEQ_SUPPORTED_PLATFORM diff --git a/tcmalloc/internal/percpu_rseq_x86_64.S b/tcmalloc/internal/percpu_rseq_x86_64.S new file mode 100644 index 000000000..03f0cc7fe --- /dev/null +++ b/tcmalloc/internal/percpu_rseq_x86_64.S @@ -0,0 +1,591 @@ +/* + * Copyright 2019 The TCMalloc Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __x86_64__ +#error "percpu_rseq_x86_64.S should only be included for x86-64 builds" +#endif // __x86_64__ + +#include "tcmalloc/internal/percpu.h" + +/* + * API Exposition: + * + * METHOD_abort: // Emitted as part of START_RSEQ() + * START_RSEQ() // Starts critical section between [start,commit) + * METHOD_start: // Emitted as part of START_RSEQ() + * FETCH_CPU() // Reads current CPU + * ... + * single store // Commits sequence + * METHOD_commit: + * ...return... + * + * This process is assisted by the DEFINE_UPSTREAM_CS macro, which encodes a + * (rodata) constant table, whose address is used to start the critical + * section, and the abort trampoline. + * + * The trampoline is used because: + * 1. Restarts are expected to be rare, so the extra jump when restarting is + * expected to be infrequent. + * 2. The upstream restartable sequence implementation expects the trailing 4 + * bytes of the abort PC to be "signed" (to prevent manipulation of the PC + * to an arbitrary choice). For us, this is PERCPU_RSEQ_SIGNATURE. This + * value is passed to the kernel during configuration of the rseq syscall. + * This would either need to be encoded as a nop (SIGN_ABORT) at the start + * of every restartable sequence, increasing instruction cache pressure, or + * placed directly before the entry point. + * + * The trampoline returns us to METHOD_abort, which is the normal entry point + * for the restartable sequence. Upon restart, the (upstream) kernel API + * clears the per-thread restartable sequence state. We return to METHOD_abort + * (rather than METHOD_start), as we need to reinitialize this value. + */ + +/* Place the code into the google_malloc section. This section is the heaviest + * user of Rseq code, so it makes sense to co-locate it. + */ + +.section google_malloc, "ax" + +/* ---------------- start helper macros ---------------- */ + +// This macro defines: +// * the rseq_cs instance that we'll use for label's critical section. +// * a trampoline to return to when we abort. This label_trampoline is +// distinct from label_start, as the return IP must be "signed" (see +// SIGN_ABORT()). +// +// TODO(b/141629158): __rseq_cs only needs to be writeable to allow for +// relocations, but could be read-only for non-PIE builds. +#define DEFINE_UPSTREAM_CS(label) \ + .pushsection __rseq_cs, "aw"; \ + .balign 32; \ + .protected __rseq_cs_##label; \ + .type __rseq_cs_##label,@object; \ + .size __rseq_cs_##label,32; \ + __rseq_cs_##label: \ + .long PERCPU_RSEQ_VERSION, PERCPU_RSEQ_FLAGS; \ + .quad .L##label##_start; \ + .quad .L##label##_commit - .L##label##_start; \ + .quad label##_trampoline; \ + .reloc 0, R_X86_64_NONE, .L##label##array; \ + .popsection; \ + .pushsection __rseq_cs_ptr_array, "aw"; \ + .L##label##array: \ + .quad __rseq_cs_##label; \ + .popsection; \ + SIGN_ABORT(); \ + .globl label##_trampoline; \ + .type label##_trampoline, @function; \ +label##_trampoline: \ + .cfi_startproc; \ + jmp .L##label##_abort; \ + .cfi_endproc; \ + .size label##_trampoline, . - label##_trampoline; + +// This is part of the upstream rseq ABI. The 4 bytes prior to the abort IP +// must match PERCPU_RSEQ_SIGNATURE (as configured by our rseq syscall's +// signature parameter). This signature is used to annotate valid abort IPs +// (since rseq_cs could live in a user-writable segment). +// +// To allow this to be safely executed as a valid instruction, we encode the +// value with a nop. This is decoded as: +// +// nopl 0xSIGNATURE(%rip) +// +#define SIGN_ABORT() \ + .byte 0x0f, 0x1f, 0x05; \ + .long PERCPU_RSEQ_SIGNATURE; + +/* + * Provide a directive to specify the size of symbol "label", relative to the + * current location and its start. + */ +#define ENCODE_SIZE(label) .size label, . - label; + +/* + * Fetches the per-cpu data addressed by (%base) for cpu %cpu into %dest. + * + * %base must be distinct from %dest since the actual address is calculated in + * place using %cpu + */ +#define FETCH_PERCPU_DATA(cpu, base, dest) \ + movslq cpu, dest; \ + shlq $PERCPU_BYTES_PER_REGION_SHIFT, dest; \ + addq base, dest; + +/* + * Combines FETCH_CPU and FETCH_PERCPU_DATA above for the local cpu (which is + * also returned in %cpu). + */ +#define FETCH_LOCAL_PERCPU_DATA(cpu, base, dest) \ + FETCH_CPU(cpu); \ + FETCH_PERCPU_DATA(cpu, base, dest); + +/* In all non-position independent cases we need to use RIP-relative label + addresses */ +#if !defined(__PIC__) +#define LABEL_ADDR(label) $label +#else +#define LABEL_ADDR(label) label@GOTPCREL(%rip) +#endif /* !defined(__PIC__) */ + +/* With PIE; have initial-exec TLS, even in the presence of position + independent code. */ +#if !defined(__PIC__) || defined(__PIE__) +#define FETCH_CPU(dest) movl %fs:__rseq_abi@TPOFF+4, dest; +#define START_RSEQ(src) \ + .L##src##_abort: \ + leaq __rseq_cs_##src(%rip), %rax; \ + movq %rax, %fs:__rseq_abi@TPOFF+8; \ + .L##src##_start: + +#else /* !defined(__PIC__) || defined(__PIE__) */ + +/* + * FETCH_CPU assumes &__rseq_abi is in %rax. We cannot call + * tcmalloc_tls_fetch_pic at this point, as we have started our restartable + * sequence. If we are prempted there, the kernel will clear rseq_cs as + * tcmalloc_tls_fetch_pic does not appear in the restartable sequence's address + * range. + */ +#define FETCH_CPU(dest) \ + movl 4(%rax), dest; /* cpuid is 32-bits */ +#define START_RSEQ(src) \ + .L##src##_abort: \ + call tcmalloc_tls_fetch_pic@PLT; \ + leaq __rseq_cs_##src(%rip), %r11; \ + movq %r11, 8(%rax); \ + .L##src##_start: + +/* + * We can safely call this function from within an RSEQ section as it only + * generates a thread-local address which will not change across a missed + * restart. This must precede the construction of any preparatory state. + */ + .globl tcmalloc_tls_fetch_pic + .type tcmalloc_tls_fetch_pic, @function +tcmalloc_tls_fetch_pic: + .cfi_startproc + push %rbp + .cfi_def_cfa_offset 16 + .cfi_offset 6, -16 + mov %rsp, %rbp + .cfi_def_cfa_register 6 + sub $0x30, %rsp + mov %rsi, -0x08(%rbp) /* atypical abi: tcmalloc_tls_fetch_pic preserves regs */ + mov %rdi, -0x10(%rbp) + mov %rdx, -0x18(%rbp) + mov %rcx, -0x20(%rbp) + mov %r8, -0x28(%rbp) + mov %r9, -0x30(%rbp) + /* + * Below is an optimized relocatable TLS lookup per ELF spec: + * http://www.akkadia.org/drepper/tls.pdf + * When possible, this is replaced at link-time with a call-free variant. + */ + .byte 0x66; + leaq __rseq_abi@TLSGD(%rip), %rdi; + .word 0x6666; + rex64; + call __tls_get_addr@PLT; + mov -0x08(%rbp), %rsi + mov -0x10(%rbp), %rdi + mov -0x18(%rbp), %rdx + mov -0x20(%rbp), %rcx + mov -0x28(%rbp), %r8 + mov -0x30(%rbp), %r9 + add $0x30, %rsp + leave + .cfi_def_cfa_register 7 + .cfi_def_cfa_offset 8 + ret; /* &__rseq_abi in %rax */ + .cfi_endproc +#endif /* !defined(__PIC__) || defined(__PIE__) */ + +/* ---------------- end helper macros ---------------- */ + +/* start of atomic restartable sequences */ + +/* + * NOTE: We don't use cmpxchgq in the following functions since this would + make checking the success of our commit operation dependent on flags (which + * are in turn clobbered by the restart region) -- furthermore we can't just + * retry to fill in the flags since the restarted cmpxchg may have actually + * succeeded; spuriously failing subsequent attempts. + */ + +/* + * int TcmallocSlab_PerCpuCmpxchg64(int target_cpu, long *p, + * long old_val, long new_val) + */ + .p2align 6; /* aligns to 2^6 with NOP filling */ + .globl TcmallocSlab_PerCpuCmpxchg64 + .type TcmallocSlab_PerCpuCmpxchg64, @function +TcmallocSlab_PerCpuCmpxchg64: + .cfi_startproc +.LTcmallocSlab_PerCpuCmpxchg64_region0: + START_RSEQ(TcmallocSlab_PerCpuCmpxchg64); + FETCH_CPU(%eax); + cmp %eax, %edi; /* check cpu vs current_cpu */ + jne .LTcmallocSlab_PerCpuCmpxchg64_region1; + cmp %rdx, (%rsi); /* verify *p == old */ + jne .LTcmallocSlab_PerCpuCmpxchg64_region2; + mov %rcx, (%rsi); +.LTcmallocSlab_PerCpuCmpxchg64_region1: +.LTcmallocSlab_PerCpuCmpxchg64_commit: + ret; /* return current cpu, indicating mismatch OR success */ +.LTcmallocSlab_PerCpuCmpxchg64_region2: + mov $-1, %eax; /* mismatch versus "old" or "check", return -1 */ + ret; +.LTcmallocSlab_PerCpuCmpxchg64_region3: + .cfi_endproc +ENCODE_SIZE(TcmallocSlab_PerCpuCmpxchg64) +DEFINE_UPSTREAM_CS(TcmallocSlab_PerCpuCmpxchg64) + +/* typedef int (*OverflowHandler)( + * int cpu (%rdi), + * size_t cl (%rsi), + * void *item (%rdx)); + * int TcmallocSlab_Push( + * void *ptr (%rdi), + * size_t cl (%rsi), + * void* item (%rdx), + * size_t shift (%rcx), + * OverflowHandler f (%r8)) { + * uint64_t rax = __rseq_abi.cpu_id; + * Header* hdr = GetHeader(rdi, rax, cl); + * uint64_t r11 = hdr->current; + * uint64_t r9 = hdr->end; + * if (r11 < r9) { + * *(CpuMemoryStart(ptr, rax) + current * 8) = rdx; + * r11++; + * hdr->current = r11; + * return rax; + * } else { + * return f(rax, cl, item); + * } + * } + * + * By default assembler will not align these symbols at all (not even on 8). + * But their performance is significantly affected by starting position + * (i.e. +/-20%). It's not that they simply need to be aligned on a sufficient + * power of two. For example, offset 17 from a 64-byte aligned address worked + * good as well. Aligning on 64 seems to work good with this particular + * implementation, but more importantly it makes performance less affected by + * addition/removal of unrelated code somewhere else. + */ + .p2align 6; /* aligns to 2^6 with NOP filling */ + .globl TcmallocSlab_Push + .type TcmallocSlab_Push, @function +TcmallocSlab_Push: + .cfi_startproc +.LTcmallocSlab_Push_region0: + START_RSEQ(TcmallocSlab_Push); + FETCH_CPU(%eax); + mov %rax, %r10 + shl %cl, %r10; /* multiply cpu by cache size */ + lea (%rdi, %r10), %r10 + movzwq (%r10, %rsi, 8), %r11 /* current */ + movzwq 6(%r10, %rsi, 8), %r9 /* end */ + cmpq %r9, %r11; + jae .LTcmallocSlab_Push_region2; + movq %rdx, (%r10, %r11, 8); + inc %r11; + movw %r11w, (%r10, %rsi, 8); +.LTcmallocSlab_Push_region1: +.LTcmallocSlab_Push_commit: + ret; +.LTcmallocSlab_Push_region2: + mov %rax, %rdi; /* rdi = cpu */ +#ifdef GOOGLE3_RETPOLINE + mov %r8, %rcx; + jmp TcmallocOverflowThunk +#else // !GOOGLE3_RETPOLINE + jmpq *%r8; /* tail call to f(cpu, cl, item) */ +#endif // !GOOGLE3_RETPOLINE +.LTcmallocSlab_Push_region3: + .cfi_endproc +ENCODE_SIZE(TcmallocSlab_Push) +DEFINE_UPSTREAM_CS(TcmallocSlab_Push) + +/* + * Variant of function that uses a fixed shift + * int TcmallocSlab_Push_FixedShift( + * void *ptr (%rdi), + * size_t cl (%rsi), + * void* item (%rdx), + * OverflowHandler f (%rcx)) { // Note that this used to be %r8 + */ + .p2align 6; /* aligns to 2^6 with NOP filling */ + .globl TcmallocSlab_Push_FixedShift + .type TcmallocSlab_Push_FixedShift, @function +TcmallocSlab_Push_FixedShift: + .cfi_startproc +.LTcmallocSlab_Push_FixedShift_region0: + START_RSEQ(TcmallocSlab_Push_FixedShift); + FETCH_CPU(%eax); + mov %rax, %r10 + shl $PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r10; /* multiply cpu by cache size */ + add %rdi, %r10 + movzwq (%r10, %rsi, 8), %r11 /* current */ + cmpw 6(%r10, %rsi, 8), %r11w /* end */ + jae .LTcmallocSlab_Push_FixedShift_region2; + movq %rdx, (%r10, %r11, 8); + inc %r11; + movw %r11w, (%r10, %rsi, 8); +.LTcmallocSlab_Push_FixedShift_region1: +.LTcmallocSlab_Push_FixedShift_commit: + ret; +.LTcmallocSlab_Push_FixedShift_region2: + mov %rax, %rdi; /* rdi = cpu */ +#ifdef GOOGLE3_RETPOLINE + jmp TcmallocOverflowThunk +#else // !GOOGLE3_RETPOLINE + jmpq *%rcx; /* tail call to f(cpu, cl, item) */ +#endif // !GOOGLE3_RETPOLINE +.LTcmallocSlab_Push_FixedShift_region3: + .cfi_endproc +ENCODE_SIZE(TcmallocSlab_Push_FixedShift) +DEFINE_UPSTREAM_CS(TcmallocSlab_Push_FixedShift) + +/* typedef void* (*UnderflowHandler*)( + * int cpu (%rdi), + * size_t cl (%rsi)); + * void* TcmallocSlab_Pop( + * void *ptr (%rdi), + * size_t cl (%rsi), + * UnderflowHandler f (%rdx), + * size_t shift (%rcx))) { + * uint64_t rax = __rseq_abi.cpu_id; + * Header* hdr = GetHeader(rdi, rax, cl); + * uint64_t r10 = hdr->current; + * uint64_t r9 = hdr->begin; + * if (r10 > r9) { + * uint64_t rax = *(CpuMemoryStart(ptr, rax) + current * 8 - 16); + * prefetcht0(rax); + * rax = *(CpuMemoryStart(ptr, rax) + current * 8 - 8); + * hdr->current--; + * return rax; + * } else { + * return f(rax, cl); + * } + * } + * + * A note about prefetcht0 in Pop (and Pop_FixedShift): While this prefetch + * may appear costly, trace analysis shows the target is frequently used + * (b/70294962). Stalling on a TLB miss at the prefetch site (which has no + * deps) and prefetching the line async is better than stalling at the use + * (which may have deps) to fill the TLB and the cache miss. + */ + .p2align 6; /* aligns to 2^6 with NOP filling */ + .globl TcmallocSlab_Pop + .type TcmallocSlab_Pop, @function +TcmallocSlab_Pop: + .cfi_startproc +.LTcmallocSlab_Pop_region0: + START_RSEQ(TcmallocSlab_Pop); + FETCH_CPU(%eax); + shl %cl, %rax; /* multiply cpu by cache size*/ + lea (%rdi, %rax), %r11; + movzwq (%r11, %rsi, 8), %r10; /* current */ + movzwq 4(%r11, %rsi, 8), %r9; /* begin */ + cmp %r9, %r10; + jbe .LTcmallocSlab_Pop_region2; + mov -16(%r11, %r10, 8), %rax; + prefetcht0 (%rax); + movq -8(%r11, %r10, 8), %rax; + decl (%r11, %rsi, 8); +.LTcmallocSlab_Pop_region1: +.LTcmallocSlab_Pop_commit: + ret; +.LTcmallocSlab_Pop_region2: + mov %rax, %rdi; /* rdi = cpu */ + shr %cl, %rdi; /* undo shift in the beginning of the function */ +#ifdef GOOGLE3_RETPOLINE + jmp TcmallocUnderflowThunk; +#else // !GOOGLE3_RETPOLINE + jmp *%rdx; /* tail call to f(cpu, cl) */ +#endif // !GOOGLE3_RETPOLINE +.LTcmallocSlab_Pop_region3: + .cfi_endproc +ENCODE_SIZE(TcmallocSlab_Pop) +DEFINE_UPSTREAM_CS(TcmallocSlab_Pop) + +/* + * Variant of function that uses a fixed shift + * void* TcmallocSlab_Pop_FixedShift( + * void *ptr (%rdi), + * size_t cl (%rsi), + * UnderflowHandler f (%rdx) { + */ + .p2align 6; /* aligns to 2^6 with NOP filling */ + .globl TcmallocSlab_Pop_FixedShift + .type TcmallocSlab_Pop_FixedShift, @function +TcmallocSlab_Pop_FixedShift: + .cfi_startproc +.LTcmallocSlab_Pop_FixedShift_region0: + START_RSEQ(TcmallocSlab_Pop_FixedShift); + FETCH_CPU(%eax); + shl $PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %rax; /* multiply cpu by cache size*/ + lea (%rdi, %rax), %r11; + movzwq (%r11, %rsi, 8), %r10; /* current */ + cmpw 4(%r11, %rsi, 8), %r10w /* begin */ + jbe .LTcmallocSlab_Pop_FixedShift_region2; + mov -16(%r11, %r10, 8), %rax; + prefetcht0 (%rax); + movq -8(%r11, %r10, 8), %rax; + decl (%r11, %rsi, 8); +.LTcmallocSlab_Pop_FixedShift_region1: +.LTcmallocSlab_Pop_FixedShift_commit: + ret; +.LTcmallocSlab_Pop_FixedShift_region2: + mov %rax, %rdi; /* rdi = cpu */ + shr $PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %rdi; /* undo shift in the beginning of the function */ +#ifdef GOOGLE3_RETPOLINE + jmp TcmallocUnderflowThunk; +#else // !GOOGLE3_RETPOLINE + jmp *%rdx; /* tail call to f(cpu, cl) */ +#endif +.LTcmallocSlab_Pop_FixedShift_region3: + .cfi_endproc +ENCODE_SIZE(TcmallocSlab_Pop_FixedShift) +DEFINE_UPSTREAM_CS(TcmallocSlab_Pop_FixedShift) + +/* size_t TcmallocSlab_PushBatch_FixedShift( + * void *ptr (%rdi), + * size_t cl (%rsi), + * void** batch (%rdx), + * size_t len (%rcx) { + * uint64_t r8 = __rseq_abi.cpu_id; + * uint64_t* r8 = CpuMemoryStart(rdi, r8); + * Header* hdr = r8 + rsi * 8; + * uint64_t r9 = hdr->current; + * uint64_t r10 = hdr->end; + * if (r9 >= r10) return 0; + * r11 = rcx; + * r10 = r9 + min(rcx, r10 - r9); + * loop: + * r11--; + * rax = batch[r11]; + * *(r8 + r9 * 8) = rax; + * r9++; + * if (r9 != r10) goto loop; + * hdr->current = r9; + * return rcx - r11; + * } + */ + .p2align 6; /* aligns to 2^6 with NOP filling */ + .globl TcmallocSlab_PushBatch_FixedShift + .type TcmallocSlab_PushBatch_FixedShift, @function +TcmallocSlab_PushBatch_FixedShift: + .cfi_startproc +.LTcmallocSlab_PushBatch_FixedShift_region0: + START_RSEQ(TcmallocSlab_PushBatch_FixedShift); + FETCH_CPU(%r8d); + shl $PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8; /* multiply cpu by 256k */ + lea (%rdi, %r8), %r8; + movzwq (%r8, %rsi, 8), %r9; /* current */ + movzwq 6(%r8, %rsi, 8), %r10; /* end */ + cmpq %r10, %r9; + jae .LTcmallocSlab_PushBatch_FixedShift_region2; + movq %rcx, %r11; /* r11 = copy of len */ + subq %r9, %r10; /* r10 = free capacity */ + cmpq %rcx, %r10; + cmovaq %rcx, %r10; /* r10 = min(len, free capacity) */ + addq %r9, %r10; +.LTcmallocSlab_PushBatch_FixedShift_loop: + decq %r11; + movq (%rdx, %r11, 8), %rax; + movq %rax, (%r8, %r9, 8); + incq %r9; + cmpq %r9, %r10; + jne .LTcmallocSlab_PushBatch_FixedShift_loop + movq %rcx, %rax; + subq %r11, %rax; + movw %r9w, (%r8, %rsi, 8); +.LTcmallocSlab_PushBatch_FixedShift_region1: +.LTcmallocSlab_PushBatch_FixedShift_commit: + ret; +.LTcmallocSlab_PushBatch_FixedShift_region2: + xor %rax, %rax; + ret; +.LTcmallocSlab_PushBatch_FixedShift_region3: + .cfi_endproc +ENCODE_SIZE(TcmallocSlab_PushBatch_FixedShift) +DEFINE_UPSTREAM_CS(TcmallocSlab_PushBatch_FixedShift) + +/* size_t TcmallocSlab_PopBatch_FixedShift( + * void *ptr (%rdi), + * size_t cl (%rsi), + * void** batch (%rdx), + * size_t len (%rcx) { + * uint64_t r8 = __rseq_abi.cpu_id; + * uint64_t* r8 = CpuMemoryStart(rdi, r8); + * Header* hdr = GetHeader(rdi, rax, cl); + * uint64_t r9 = hdr->current; + * uint64_t r10 = hdr->begin; + * if (r9 <= r10) return 0; + * r11 = min(rcx, r9 - r10); + * rax = 0; + * loop: + * r9--; + * r10 = *(r8 + r9 * 8); + * batch[rax] = r10; + * rax++; + * if (rax != r11) goto loop; + * hdr->current = r9; + * return rax; + * } + */ + .p2align 6; /* aligns to 2^6 with NOP filling */ + .globl TcmallocSlab_PopBatch_FixedShift + .type TcmallocSlab_PopBatch_FixedShift, @function +TcmallocSlab_PopBatch_FixedShift: + .cfi_startproc +.LTcmallocSlab_PopBatch_FixedShift_region0: + START_RSEQ(TcmallocSlab_PopBatch_FixedShift); + FETCH_CPU(%r8d); + shl $PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8; /* multiply cpu by 256k */ + lea (%rdi, %r8), %r8; + movzwq (%r8, %rsi, 8), %r9; /* current */ + movzwq 4(%r8, %rsi, 8), %r10; /* begin */ + cmp %r10, %r9; + jbe .LTcmallocSlab_PopBatch_FixedShift_region2; + movq %r9, %r11; + subq %r10, %r11; /* r11 = available items */ + cmpq %rcx, %r11; + cmovaq %rcx, %r11; /* r11 = min(len, available items) */ + xorq %rax, %rax; +.LTcmallocSlab_PopBatch_FixedShift_loop: + decq %r9; + movq (%r8, %r9, 8), %r10; + movq %r10, (%rdx, %rax, 8); + incq %rax; + cmpq %rax, %r11; + jne .LTcmallocSlab_PopBatch_FixedShift_loop + movw %r9w, (%r8, %rsi, 8); +.LTcmallocSlab_PopBatch_FixedShift_region1: +.LTcmallocSlab_PopBatch_FixedShift_commit: + ret; +.LTcmallocSlab_PopBatch_FixedShift_region2: + xor %rax, %rax; + ret; +.LTcmallocSlab_PopBatch_FixedShift_region3: + .cfi_endproc +ENCODE_SIZE(TcmallocSlab_PopBatch_FixedShift) +DEFINE_UPSTREAM_CS(TcmallocSlab_PopBatch_FixedShift) + +.section .note.GNU-stack,"",@progbits diff --git a/tcmalloc/internal/proc_maps.cc b/tcmalloc/internal/proc_maps.cc new file mode 100644 index 000000000..a8d00d96a --- /dev/null +++ b/tcmalloc/internal/proc_maps.cc @@ -0,0 +1,169 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/proc_maps.h" + +#include +#include +#include + +#include +#include + +#include "absl/strings/str_format.h" +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { +namespace tcmalloc_internal { + +ProcMapsIterator::ProcMapsIterator(pid_t pid) { Init(pid, nullptr); } + +ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer* buffer) { + Init(pid, buffer); +} + +void ProcMapsIterator::Init(pid_t pid, Buffer* buffer) { + if (pid == 0) { + pid = getpid(); + } + + pid_ = pid; + if (!buffer) { + // If the user didn't pass in any buffer storage, allocate it + // now. This is the normal case; the signal handler passes in a + // static buffer. + buffer = dynamic_buffer_ = new Buffer; + } else { + dynamic_buffer_ = nullptr; + } + + ibuf_ = buffer->buf; + + stext_ = etext_ = nextline_ = ibuf_; + ebuf_ = ibuf_ + Buffer::kBufSize - 1; + nextline_ = ibuf_; + +#if defined(__linux__) + // /maps exists in two places: /proc/pid/ and /proc/pid/task/tid (for each + // thread in the process.) The only difference between these is the "global" + // view (/proc/pid/maps) attempts to label each VMA which is the stack of a + // thread. This is nice to have, but not critical, and scales quadratically. + // Use the main thread's "local" view to ensure adequate performance. + int path_length = absl::SNPrintF(ibuf_, Buffer::kBufSize, + "/proc/%d/task/%d/maps", pid, pid); + CHECK_CONDITION(path_length < Buffer::kBufSize); + + // No error logging since this can be called from the crash dump + // handler at awkward moments. Users should call Valid() before + // using. + TEMP_FAILURE_RETRY(fd_ = open(ibuf_, O_RDONLY)); +#else + fd_ = -1; // so Valid() is always false +#endif +} + +ProcMapsIterator::~ProcMapsIterator() { + // As it turns out, Linux guarantees that close() does in fact close a file + // descriptor even when the return value is EINTR. According to the notes in + // the manpage for close(2), this is widespread yet not fully portable, which + // is unfortunate. POSIX explicitly leaves this behavior as unspecified. + if (fd_ >= 0) close(fd_); + delete dynamic_buffer_; +} + +bool ProcMapsIterator::Valid() const { + return fd_ != -1; +} + +bool ProcMapsIterator::NextExt(uint64_t* start, uint64_t* end, char** flags, + uint64_t* offset, int64_t* inode, char** filename, + dev_t* dev) { +#if defined __linux__ + do { + // Advance to the start of the next line + stext_ = nextline_; + + // See if we have a complete line in the buffer already + nextline_ = static_cast(memchr(stext_, '\n', etext_ - stext_)); + if (!nextline_) { + // Shift/fill the buffer so we do have a line + int count = etext_ - stext_; + + // Move the current text to the start of the buffer + memmove(ibuf_, stext_, count); + stext_ = ibuf_; + etext_ = ibuf_ + count; + + int nread = 0; // fill up buffer with text + while (etext_ < ebuf_) { + TEMP_FAILURE_RETRY(nread = read(fd_, etext_, ebuf_ - etext_)); + if (nread > 0) + etext_ += nread; + else + break; + } + + // Zero out remaining characters in buffer at EOF to avoid returning + // garbage from subsequent calls. + if (etext_ != ebuf_ && nread == 0) { + memset(etext_, 0, ebuf_ - etext_); + } + *etext_ = '\n'; // sentinel; safe because ibuf extends 1 char beyond ebuf + nextline_ = static_cast(memchr(stext_, '\n', etext_ + 1 - stext_)); + } + *nextline_ = 0; // turn newline into nul + nextline_ += ((nextline_ < etext_) ? 1 : 0); // skip nul if not end of text + // stext_ now points at a nul-terminated line + unsigned long long tmpstart, tmpend, tmpoffset; // NOLINT + long long tmpinode, local_inode; // NOLINT + unsigned long long local_start, local_end, local_offset; // NOLINT + int major, minor; + unsigned filename_offset = 0; + // for now, assume all linuxes have the same format + int para_num = + sscanf(stext_, "%llx-%llx %4s %llx %x:%x %lld %n", + start ? &local_start : &tmpstart, end ? &local_end : &tmpend, + flags_, offset ? &local_offset : &tmpoffset, &major, &minor, + inode ? &local_inode : &tmpinode, &filename_offset); + + if (para_num != 7) continue; + + if (start) *start = local_start; + if (end) *end = local_end; + if (offset) *offset = local_offset; + if (inode) *inode = local_inode; + // Depending on the Linux kernel being used, there may or may not be a space + // after the inode if there is no filename. sscanf will in such situations + // nondeterministically either fill in filename_offset or not (the results + // differ on multiple calls in the same run even with identical arguments). + // We don't want to wander off somewhere beyond the end of the string. + size_t stext_length = strlen(stext_); + if (filename_offset == 0 || filename_offset > stext_length) + filename_offset = stext_length; + + // We found an entry + if (flags) *flags = flags_; + if (filename) *filename = stext_ + filename_offset; + if (dev) *dev = makedev(major, minor); + + return true; + } while (etext_ > ibuf_); +#endif + + // We didn't find anything + return false; +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/tcmalloc/internal/proc_maps.h b/tcmalloc/internal/proc_maps.h new file mode 100644 index 000000000..17a148ed6 --- /dev/null +++ b/tcmalloc/internal/proc_maps.h @@ -0,0 +1,66 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_PROC_MAPS_H_ +#define TCMALLOC_INTERNAL_PROC_MAPS_H_ + +#include +#include +#include + +namespace tcmalloc { +namespace tcmalloc_internal { + +// A ProcMapsIterator abstracts access to /proc/maps for a given process. +class ProcMapsIterator { + public: + struct Buffer { + static const size_t kBufSize = PATH_MAX + 1024; + char buf[kBufSize]; + }; + + // Create a new iterator for the specified pid. pid can be 0 for "self". + explicit ProcMapsIterator(pid_t pid); + + // Create an iterator with specified storage (for use in signal handler). + // "buffer" should point to a ProcMapsIterator::Buffer buffer can be null in + // which case a buffer will be allocated. + ProcMapsIterator(pid_t pid, Buffer* buffer); + + // Returns true if the iterator successfully initialized; + bool Valid() const; + + bool NextExt(uint64_t* start, uint64_t* end, char** flags, uint64_t* offset, + int64_t* inode, char** filename, dev_t* dev); + + ~ProcMapsIterator(); + + private: + void Init(pid_t pid, Buffer* buffer); + + char* ibuf_; // input buffer + char* stext_; // start of text + char* etext_; // end of text + char* nextline_; // start of next line + char* ebuf_; // end of buffer (1 char for a nul) + int fd_; // filehandle on /proc/*/maps + pid_t pid_; + char flags_[10]; + Buffer* dynamic_buffer_; // dynamically-allocated Buffer +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc + +#endif // TCMALLOC_INTERNAL_PROC_MAPS_H_ diff --git a/tcmalloc/internal/range_tracker.h b/tcmalloc/internal/range_tracker.h new file mode 100644 index 000000000..f25426d4d --- /dev/null +++ b/tcmalloc/internal/range_tracker.h @@ -0,0 +1,526 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_RANGE_TRACKER_H_ +#define TCMALLOC_RANGE_TRACKER_H_ + +#include +#include +#include + +#include +#include +#include + +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { + +// Helpers for below +class Bitops { + public: + // Returns the zero-indexed first set bit of a nonzero word. + // i.e. FindFirstSet(~0) = FindFirstSet(1) = 0, FindFirstSet(1 << 63) = 63. + // REQUIRES: word != 0. + static inline size_t FindFirstSet(size_t word); + + // Returns the zero-indexed last set bit of a nonzero word. + // i.e. FindLastSet(~0) = FindLastSet(1 << 63) = 63, FindLastSet(1) = 0. + // REQUIRES: word != 0. + static inline size_t FindLastSet(size_t word); + + // Returns the count of set bits in word. + static inline size_t CountSetBits(size_t word); +}; + +// Keeps a bitmap of some fixed size (N bits). +template +class Bitmap { + public: + constexpr Bitmap() : bits_{} {} + + size_t size() const { return N; } + bool GetBit(size_t i) const; + + void SetBit(size_t i); + void ClearBit(size_t i); + + // Returns the number of set bits [index, ..., index + n - 1]. + size_t CountBits(size_t index, size_t n) const; + + // Equivalent to SetBit on bits [index, index + 1, ... index + n - 1]. + void SetRange(size_t index, size_t n); + void ClearRange(size_t index, size_t n); + + // If there is at least one free range at or after , + // put it in *index, *length and return true; else return false. + bool NextFreeRange(size_t start, size_t *index, size_t *length) const; + + // Returns index of the first {true, false} bit >= index, or N if none. + size_t FindSet(size_t index) const; + size_t FindClear(size_t index) const; + + // Returns index of the first {set, clear} bit in [index, 0] or -1 if none. + ssize_t FindSetBackwards(size_t index) const; + ssize_t FindClearBackwards(size_t index) const; + + void Clear(); + + private: + static constexpr size_t kWordSize = sizeof(size_t) * 8; + static constexpr size_t kWords = (N + kWordSize - 1) / kWordSize; + static constexpr size_t kDeadBits = kWordSize * kWords - N; + + size_t bits_[kWords]; + + size_t CountWordBits(size_t i, size_t from, size_t to) const; + + template + void SetWordBits(size_t i, size_t from, size_t to); + template + void SetRangeValue(size_t index, size_t n); + + template + size_t FindValue(size_t index) const; + template + ssize_t FindValueBackwards(size_t index) const; +}; + +// Tracks allocations in a range of items of fixed size. Supports +// finding an unset range of a given length, while keeping track of +// the largest remaining unmarked length. +template +class RangeTracker { + public: + constexpr RangeTracker() + : bits_{}, longest_free_(N), nused_(0), nallocs_(0) {} + + size_t size() const; + // Number of bits marked + size_t used() const; + // Number of bits clear + size_t total_free() const; + // Longest contiguous range of clear bits. + size_t longest_free() const; + // Count of live allocations. + size_t allocs() const; + + // REQUIRES: there is a free range of at least n bits + // (i.e. n <= longest_free()) + // finds and marks n free bits, returning index of the first bit. + // Chooses by best fit. + size_t FindAndMark(size_t n); + + // REQUIRES: the range [index, index + n) is fully marked, and + // was the returned value from a call to FindAndMark. + // Unmarks it. + void Unmark(size_t index, size_t n); + // If there is at least one free range at or after , + // put it in *index, *length and return true; else return false. + bool NextFreeRange(size_t start, size_t *index, size_t *length) const; + + void Clear(); + + private: + Bitmap bits_; + + // Computes the smallest unsigned type that can hold the constant N. + class UnsignedTypeFittingSize { + private: + static_assert(N <= std::numeric_limits::max(), + "size_t more than 64 bits??"); + template + static constexpr bool Fit() { + return N <= std::numeric_limits::max(); + } + struct U32 { + using type = + typename std::conditional(), uint32_t, uint64_t>::type; + }; + + struct U16 { + using type = typename std::conditional(), uint16_t, + typename U32::type>::type; + }; + + struct U8 { + using type = typename std::conditional(), uint8_t, + typename U16::type>::type; + }; + + public: + using type = typename U8::type; + }; + + // we keep various stats in the range [0, N]; make them as small as possible. + using Count = typename UnsignedTypeFittingSize::type; + + Count longest_free_; + Count nused_; + Count nallocs_; +}; + +template +inline size_t RangeTracker::size() const { + return bits_.size(); +} + +template +inline size_t RangeTracker::used() const { + return nused_; +} + +template +inline size_t RangeTracker::total_free() const { + return N - used(); +} + +template +inline size_t RangeTracker::longest_free() const { + return longest_free_; +} + +template +inline size_t RangeTracker::allocs() const { + return nallocs_; +} + +template +inline size_t RangeTracker::FindAndMark(size_t n) { + // We keep the two longest ranges in the bitmap since we might allocate + // from one. + size_t longest_len = 0; + size_t second_len = 0; + + // the best (shortest) range we could use + // TODO(b/134691947): shortest? lowest-addressed? + size_t best_index = N; + size_t best_len = 2 * N; + // Iterate over free ranges: + size_t index = 0, len; + + while (bits_.NextFreeRange(index, &index, &len)) { + if (len > longest_len) { + second_len = longest_len; + longest_len = len; + } else if (len > second_len) { + second_len = len; + } + + if (len >= n && len < best_len) { + best_index = index; + best_len = len; + } + + index += len; + } + + CHECK_CONDITION(best_index < N); + bits_.SetRange(best_index, n); + + if (best_len == longest_len) { + longest_len -= n; + if (longest_len < second_len) longest_len = second_len; + } + + longest_free_ = longest_len; + nused_ += n; + nallocs_++; + return best_index; +} + +// REQUIRES: the range [index, index + n) is fully marked. +// Unmarks it. +template +inline void RangeTracker::Unmark(size_t index, size_t n) { + ASSERT(bits_.FindClear(index) >= index + n); + bits_.ClearRange(index, n); + nused_ -= n; + nallocs_--; + + // We just opened up a new free range--it might be the longest. + size_t lim = bits_.FindSet(index + n - 1); + index = bits_.FindSetBackwards(index) + 1; + n = lim - index; + if (n > longest_free()) { + longest_free_ = n; + } +} + +// If there is at least one free range at or after , +// put it in *index, *length and return true; else return false. +template +inline bool RangeTracker::NextFreeRange(size_t start, size_t *index, + size_t *length) const { + return bits_.NextFreeRange(start, index, length); +} + +template +inline void RangeTracker::Clear() { + bits_.Clear(); + nallocs_ = 0; + nused_ = 0; + longest_free_ = N; +} + +// Count the set bits [from, to) in the i-th word to Value. +template +inline size_t Bitmap::CountWordBits(size_t i, size_t from, size_t to) const { + ASSERT(from < kWordSize); + ASSERT(to <= kWordSize); + const size_t all_ones = ~static_cast(0); + // how many bits are we setting? + const size_t n = to - from; + ASSERT(0 < n && n <= kWordSize); + const size_t mask = (all_ones >> (kWordSize - n)) << from; + + return Bitops::CountSetBits(bits_[i] & mask); +} + +// Set the bits [from, to) in the i-th word to Value. +template +template +inline void Bitmap::SetWordBits(size_t i, size_t from, size_t to) { + ASSERT(from < kWordSize); + ASSERT(to <= kWordSize); + const size_t all_ones = ~static_cast(0); + // how many bits are we setting? + const size_t n = to - from; + ASSERT(n > 0 && n <= kWordSize); + const size_t mask = (all_ones >> (kWordSize - n)) << from; + if (Value) { + bits_[i] |= mask; + } else { + bits_[i] &= ~mask; + } +} + +inline size_t Bitops::FindFirstSet(size_t word) { + static_assert(sizeof(size_t) == sizeof(unsigned int) || + sizeof(size_t) == sizeof(unsigned long) || + sizeof(size_t) == sizeof(unsigned long long), + "Unexpected size_t size"); + + // Previously, we relied on inline assembly to implement this function for + // x86. Relying on the compiler built-ins reduces the amount of architecture + // specific code. + // + // This does leave an false dependency errata + // (https://bugs.llvm.org/show_bug.cgi?id=33869#c24), but that is more easily + // addressed by the compiler than TCMalloc. + ASSUME(word != 0); + if (sizeof(size_t) == sizeof(unsigned int)) { + return __builtin_ctz(word); + } else if (sizeof(size_t) == sizeof(unsigned long)) { // NOLINT + return __builtin_ctzl(word); + } else { + return __builtin_ctzll(word); + } +} + +inline size_t Bitops::FindLastSet(size_t word) { + static_assert(sizeof(size_t) == sizeof(unsigned int) || + sizeof(size_t) == sizeof(unsigned long) || + sizeof(size_t) == sizeof(unsigned long long), + "Unexpected size_t size"); + + ASSUME(word != 0); + if (sizeof(size_t) == sizeof(unsigned int)) { + return (CHAR_BIT * sizeof(word) - 1) - __builtin_clz(word); + } else if (sizeof(size_t) == sizeof(unsigned long)) { // NOLINT + return (CHAR_BIT * sizeof(word) - 1) - __builtin_clzl(word); + } else { + return (CHAR_BIT * sizeof(word) - 1) - __builtin_clzll(word); + } +} + +inline size_t Bitops::CountSetBits(size_t word) { + static_assert(sizeof(size_t) == sizeof(unsigned int) || + sizeof(size_t) == sizeof(unsigned long) || // NOLINT + sizeof(size_t) == sizeof(unsigned long long), // NOLINT + "Unexpected size_t size"); + + if (sizeof(size_t) == sizeof(unsigned int)) { + return __builtin_popcount(word); + } else if (sizeof(size_t) == sizeof(unsigned long)) { // NOLINT + return __builtin_popcountl(word); + } else { + return __builtin_popcountll(word); + } +} + +template +inline bool Bitmap::GetBit(size_t i) const { + size_t word = i / kWordSize; + size_t offset = i % kWordSize; + return bits_[word] & (size_t{1} << offset); +} + +template +inline void Bitmap::SetBit(size_t i) { + size_t word = i / kWordSize; + size_t offset = i % kWordSize; + bits_[word] |= (size_t{1} << offset); +} + +template +inline void Bitmap::ClearBit(size_t i) { + size_t word = i / kWordSize; + size_t offset = i % kWordSize; + bits_[word] &= ~(size_t{1} << offset); +} + +template +inline size_t Bitmap::CountBits(size_t index, size_t n) const { + ASSERT(index + n <= N); + size_t count = 0; + if (n == 0) { + return count; + } + + size_t word = index / kWordSize; + size_t offset = index % kWordSize; + size_t k = std::min(offset + n, kWordSize); + count += CountWordBits(word, offset, k); + n -= k - offset; + while (n > 0) { + word++; + k = std::min(n, kWordSize); + count += CountWordBits(word, 0, k); + n -= k; + } + + return count; +} + +template +inline void Bitmap::SetRange(size_t index, size_t n) { + SetRangeValue(index, n); +} + +template +inline void Bitmap::ClearRange(size_t index, size_t n) { + SetRangeValue(index, n); +} + +template +template +inline void Bitmap::SetRangeValue(size_t index, size_t n) { + ASSERT(index + n <= N); + size_t word = index / kWordSize; + size_t offset = index % kWordSize; + size_t k = offset + n; + if (k > kWordSize) k = kWordSize; + SetWordBits(word, offset, k); + n -= k - offset; + while (n > 0) { + word++; + k = n; + if (k > kWordSize) k = kWordSize; + SetWordBits(word, 0, k); + n -= k; + } +} + +template +inline bool Bitmap::NextFreeRange(size_t start, size_t *index, + size_t *length) const { + if (start >= N) return false; + size_t i = FindClear(start); + if (i == N) return false; + size_t j = FindSet(i); + *index = i; + *length = j - i; + return true; +} + +template +inline size_t Bitmap::FindSet(size_t index) const { + return FindValue(index); +} + +template +inline size_t Bitmap::FindClear(size_t index) const { + return FindValue(index); +} + +template +inline ssize_t Bitmap::FindSetBackwards(size_t index) const { + return FindValueBackwards(index); +} + +template +inline ssize_t Bitmap::FindClearBackwards(size_t index) const { + return FindValueBackwards(index); +} + +template +inline void Bitmap::Clear() { + for (int i = 0; i < kWords; ++i) { + bits_[i] = 0; + } +} + +template +template +inline size_t Bitmap::FindValue(size_t index) const { + size_t offset = index % kWordSize; + size_t word = index / kWordSize; + size_t here = bits_[word]; + if (!Goal) here = ~here; + size_t mask = ~static_cast(0) << offset; + here &= mask; + while (here == 0) { + ++word; + if (word >= kWords) { + return N; + } + here = bits_[word]; + if (!Goal) here = ~here; + } + + word *= kWordSize; + size_t ret = Bitops::FindFirstSet(here) + word; + if (kDeadBits > 0) { + if (ret > N) ret = N; + } + return ret; +} + +template +template +inline ssize_t Bitmap::FindValueBackwards(size_t index) const { + size_t offset = index % kWordSize; + ssize_t word = index / kWordSize; + size_t here = bits_[word]; + if (!Goal) here = ~here; + size_t mask = (static_cast(2) << offset) - 1; + here &= mask; + while (here == 0) { + --word; + if (word < 0) { + return -1; + } + here = bits_[word]; + if (!Goal) here = ~here; + } + + word *= kWordSize; + size_t ret = Bitops::FindLastSet(here) + word; + return ret; +} + +} // namespace tcmalloc + +#endif // TCMALLOC_RANGE_TRACKER_H_ diff --git a/tcmalloc/internal/range_tracker_test.cc b/tcmalloc/internal/range_tracker_test.cc new file mode 100644 index 000000000..b6596dc4d --- /dev/null +++ b/tcmalloc/internal/range_tracker_test.cc @@ -0,0 +1,271 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/range_tracker.h" + +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/attributes.h" +#include "absl/container/fixed_array.h" +#include "absl/random/distributions.h" +#include "absl/random/random.h" + +namespace tcmalloc { +namespace { + +using testing::ElementsAre; +using testing::Pair; + +class BitmapTest : public testing::Test { + protected: + template + std::vector FindSetResults(const Bitmap &map) { + return FindResults(map); + } + + template + std::vector FindClearResults(const Bitmap &map) { + return FindResults(map); + } + + template + std::vector FindResults(const Bitmap &map) { + std::vector results; + ssize_t last = -1; + for (size_t i = 0; i < N; ++i) { + ssize_t j = Value ? map.FindSet(i) : map.FindClear(i); + EXPECT_LE(last, j) << i; + EXPECT_LE(i, j) << i; + EXPECT_GE(N, j) << i; + if (last != j) { + results.push_back(j); + last = j; + } + } + + return results; + } + + template + std::vector FindSetResultsBackwards(const Bitmap &map) { + return FindResultsBackwards(map); + } + + template + std::vector FindClearResultsBackwards(const Bitmap &map) { + return FindResultsBackwards(map); + } + + template + std::vector FindResultsBackwards(const Bitmap &map) { + std::vector results; + ssize_t last = N; + for (ssize_t i = N - 1; i >= 0; --i) { + ssize_t j = Value ? map.FindSetBackwards(i) : map.FindClearBackwards(i); + EXPECT_GE(last, j) << i; + EXPECT_GE(i, j) << i; + EXPECT_LE(-1, j) << i; + if (last != j) { + results.push_back(j); + last = j; + } + } + + return results; + } +}; + +TEST_F(BitmapTest, GetBitEmpty) { + Bitmap<253> map; + for (size_t i = 0; i < map.size(); ++i) { + EXPECT_EQ(map.GetBit(i), 0); + } +} + +TEST_F(BitmapTest, GetBitOneSet) { + const size_t N = 251; + for (size_t s = 0; s < N; s++) { + Bitmap map; + map.SetBit(s); + for (size_t i = 0; i < map.size(); ++i) { + EXPECT_EQ(map.GetBit(i), i == s ? 1 : 0); + } + } +} + +TEST_F(BitmapTest, FindSet) { + Bitmap<253> map; + EXPECT_THAT(FindSetResults(map), ElementsAre(253)); + EXPECT_THAT(FindSetResultsBackwards(map), ElementsAre(-1)); + map.SetBit(7); + map.SetBit(14); + map.SetBit(15); + map.SetBit(63); + map.SetBit(128); + EXPECT_THAT(FindSetResults(map), ElementsAre(7, 14, 15, 63, 128, 253)); + EXPECT_THAT(FindSetResultsBackwards(map), + ElementsAre(128, 63, 15, 14, 7, -1)); + map.SetBit(195); + map.SetBit(196); + map.SetBit(251); + map.SetBit(252); + EXPECT_THAT(FindSetResults(map), + ElementsAre(7, 14, 15, 63, 128, 195, 196, 251, 252)); + EXPECT_THAT(FindSetResultsBackwards(map), + ElementsAre(252, 251, 196, 195, 128, 63, 15, 14, 7, -1)); + map.SetBit(0); + EXPECT_THAT(FindSetResultsBackwards(map), + ElementsAre(252, 251, 196, 195, 128, 63, 15, 14, 7, 0)); +} + +TEST_F(BitmapTest, FindClear) { + Bitmap<253> map; + map.SetRange(0, 253); + EXPECT_THAT(FindClearResults(map), ElementsAre(253)); + EXPECT_THAT(FindClearResultsBackwards(map), ElementsAre(-1)); + + map.ClearBit(7); + map.ClearBit(14); + map.ClearBit(15); + map.ClearBit(63); + map.ClearBit(128); + EXPECT_THAT(FindClearResults(map), ElementsAre(7, 14, 15, 63, 128, 253)); + EXPECT_THAT(FindClearResultsBackwards(map), + ElementsAre(128, 63, 15, 14, 7, -1)); + map.ClearBit(195); + map.ClearBit(196); + map.ClearBit(251); + map.ClearBit(252); + EXPECT_THAT(FindClearResults(map), + ElementsAre(7, 14, 15, 63, 128, 195, 196, 251, 252)); + EXPECT_THAT(FindClearResultsBackwards(map), + ElementsAre(252, 251, 196, 195, 128, 63, 15, 14, 7, -1)); + map.ClearBit(0); + EXPECT_THAT(FindClearResultsBackwards(map), + ElementsAre(252, 251, 196, 195, 128, 63, 15, 14, 7, 0)); +} + +TEST_F(BitmapTest, CountBits) { + Bitmap<253> map; + map.SetRange(0, 253); + EXPECT_EQ(map.CountBits(0, 253), 253); + EXPECT_EQ(map.CountBits(8, 245), 245); + EXPECT_EQ(map.CountBits(0, 250), 250); + + map.ClearBit(7); + map.ClearBit(14); + map.ClearBit(15); + map.ClearBit(63); + map.ClearBit(128); + + EXPECT_EQ(map.CountBits(0, 253), 248); + EXPECT_EQ(map.CountBits(8, 245), 241); + EXPECT_EQ(map.CountBits(0, 250), 245); + + map.ClearBit(195); + map.ClearBit(196); + map.ClearBit(251); + map.ClearBit(252); + + EXPECT_EQ(map.CountBits(0, 253), 244); + EXPECT_EQ(map.CountBits(8, 245), 237); + EXPECT_EQ(map.CountBits(0, 250), 243); + + map.ClearBit(0); + + EXPECT_EQ(map.CountBits(0, 253), 243); + EXPECT_EQ(map.CountBits(8, 245), 237); + EXPECT_EQ(map.CountBits(0, 250), 242); +} + +TEST_F(BitmapTest, CountBitsFuzz) { + static constexpr size_t kBits = 253; + absl::FixedArray truth(kBits); + Bitmap map; + + absl::BitGen rng; + for (int i = 0; i < kBits; i++) { + bool v = absl::Bernoulli(rng, 0.3); + truth[i] = v; + if (v) { + map.SetBit(i); + } + } + + for (int i = 0; i < 100; i++) { + SCOPED_TRACE(i); + + // Pick a random starting point and a length, use a naive loop against truth + // to calculate the expected bit count. + size_t start = absl::Uniform(rng, 0u, kBits); + size_t length = absl::Uniform(rng, 0u, kBits - start); + + size_t expected = 0; + for (int j = 0; j < length; j++) { + if (truth[start + j]) { + expected++; + } + } + + EXPECT_EQ(expected, map.CountBits(start, length)); + } +} + +class RangeTrackerTest : public ::testing::Test { + protected: + std::vector> FreeRanges() { + std::vector> ret; + size_t index = 0, len; + while (range_.NextFreeRange(index, &index, &len)) { + ret.push_back({index, len}); + index += len; + } + return ret; + } + static const size_t kBits = 1017; + RangeTracker range_; +}; + +const size_t RangeTrackerTest::kBits; + +TEST_F(RangeTrackerTest, Trivial) { + EXPECT_EQ(kBits, range_.size()); + EXPECT_EQ(0, range_.used()); + EXPECT_EQ(kBits, range_.longest_free()); + EXPECT_THAT(FreeRanges(), ElementsAre(Pair(0, kBits))); + ASSERT_EQ(0, range_.FindAndMark(kBits)); + EXPECT_EQ(0, range_.longest_free()); + EXPECT_EQ(kBits, range_.used()); + EXPECT_THAT(FreeRanges(), ElementsAre()); + range_.Unmark(0, 100); + EXPECT_EQ(100, range_.longest_free()); + EXPECT_EQ(kBits - 100, range_.used()); + EXPECT_THAT(FreeRanges(), ElementsAre(Pair(0, 100))); + // non-contiguous - shouldn't increase longest + range_.Unmark(200, 100); + EXPECT_EQ(100, range_.longest_free()); + EXPECT_EQ(kBits - 200, range_.used()); + EXPECT_THAT(FreeRanges(), ElementsAre(Pair(0, 100), Pair(200, 100))); + range_.Unmark(100, 100); + EXPECT_EQ(300, range_.longest_free()); + EXPECT_EQ(kBits - 300, range_.used()); + EXPECT_THAT(FreeRanges(), ElementsAre(Pair(0, 300))); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/internal/util.cc b/tcmalloc/internal/util.cc new file mode 100644 index 000000000..23be28924 --- /dev/null +++ b/tcmalloc/internal/util.cc @@ -0,0 +1,216 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "tcmalloc/internal/util.h" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { +namespace tcmalloc_internal { + +int signal_safe_open(const char *path, int flags, ...) { + int fd; + va_list ap; + + va_start(ap, flags); + mode_t mode = va_arg(ap, mode_t); + va_end(ap); + + do { + fd = ((flags & O_CREAT) ? open(path, flags, mode) : open(path, flags)); + } while (fd == -1 && errno == EINTR); + + return fd; +} + +int signal_safe_close(int fd) { + int rc; + + do { + rc = close(fd); + } while (rc == -1 && errno == EINTR); + + return rc; +} + +ssize_t signal_safe_write(int fd, const char *buf, size_t count, + size_t *bytes_written) { + ssize_t rc; + size_t total_bytes = 0; + + do { + rc = write(fd, buf + total_bytes, count - total_bytes); + if (rc > 0) + total_bytes += rc; + } while ((rc > 0 && count > total_bytes ) || + (rc == -1 && errno == EINTR)); + + if (bytes_written != nullptr) *bytes_written = total_bytes; + + return rc; +} + +int signal_safe_poll(struct pollfd *fds, int nfds, int timeout_ms) { + int rc = 0; + int elapsed_ms = 0; + + // We can't use gettimeofday since it's not async signal safe. We could use + // clock_gettime but that would require linking //base against librt. + // Fortunately, timeout is of sufficiently coarse granularity that we can just + // approximate it. + while ((elapsed_ms <= timeout_ms || timeout_ms == -1) && (rc == 0)) { + if (elapsed_ms++ > 0) ::absl::SleepFor(::absl::Milliseconds(1)); + while ((rc = poll(fds, nfds, 0)) == -1 && errno == EINTR) {} + } + + return rc; +} + + +ssize_t signal_safe_read(int fd, char *buf, size_t count, size_t *bytes_read) { + ssize_t rc; + size_t total_bytes = 0; + struct pollfd pfd; + + // poll is required for testing whether there is any data left on fd in the + // case of a signal interrupting a partial read. This is needed since this + // case is only defined to return the number of bytes read up to that point, + // with no indication whether more could have been read (up to count). + pfd.fd = fd; + pfd.events = POLL_IN; + pfd.revents = 0; + + do { + rc = read(fd, buf + total_bytes, count - total_bytes); + if (rc > 0) + total_bytes += rc; + + if (rc == 0) + break; // EOF + // try again if there's space to fill, no (non-interrupt) error, + // and data is available. + } while (total_bytes < count && (rc > 0 || errno == EINTR) && + (signal_safe_poll(&pfd, 1, 0) == 1 || total_bytes == 0)); + + if (bytes_read) + *bytes_read = total_bytes; + + if (rc != -1 || errno == EINTR) + rc = total_bytes; // return the cumulative bytes read + return rc; +} + +// POSIX provides the **environ array which contains environment variables in a +// linear array, terminated by a NULL string. This array is only perturbed when +// the environment is changed (which is inherently unsafe) so it's safe to +// return a const pointer into it. +// e.g. { "SHELL=/bin/bash", "MY_ENV_VAR=1", "" } +extern "C" char **environ; +const char* thread_safe_getenv(const char *env_var) { + int var_len = strlen(env_var); + + char **envv = environ; + if (!envv) { + return nullptr; + } + + for (; *envv != nullptr; envv++) + if (strncmp(*envv, env_var, var_len) == 0 && (*envv)[var_len] == '=') + return *envv + var_len + 1; // skip over the '=' + + return nullptr; +} + +std::vector AllowedCpus() { + // We have no need for dynamically sized sets (currently >1024 CPUs for glibc) + // at the present time. We could change this in the future. + cpu_set_t allowed_cpus; + CHECK_CONDITION(sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus) == + 0); + int n = CPU_COUNT(&allowed_cpus), c = 0; + + std::vector result(n); + for (int i = 0; i < CPU_SETSIZE && n; i++) { + if (CPU_ISSET(i, &allowed_cpus)) { + result[c++] = i; + n--; + } + } + CHECK_CONDITION(0 == n); + + return result; +} + +static cpu_set_t SpanToCpuSetT(absl::Span mask) { + cpu_set_t result; + CPU_ZERO(&result); + for (int cpu : mask) { + CPU_SET(cpu, &result); + } + return result; +} + +ScopedAffinityMask::ScopedAffinityMask(absl::Span allowed_cpus) { + specified_cpus_ = SpanToCpuSetT(allowed_cpus); + // getaffinity should never fail. + CHECK_CONDITION( + sched_getaffinity(0, sizeof(original_cpus_), &original_cpus_) == 0); + // See destructor comments on setaffinity interactions. Tampered() will + // necessarily be true in this case. + sched_setaffinity(0, sizeof(specified_cpus_), &specified_cpus_); +} + +ScopedAffinityMask::ScopedAffinityMask(int allowed_cpu) { + CPU_ZERO(&specified_cpus_); + CPU_SET(allowed_cpu, &specified_cpus_); + + // getaffinity should never fail. + CHECK_CONDITION( + sched_getaffinity(0, sizeof(original_cpus_), &original_cpus_) == 0); + // See destructor comments on setaffinity interactions. Tampered() will + // necessarily be true in this case. + sched_setaffinity(0, sizeof(specified_cpus_), &specified_cpus_); +} + +ScopedAffinityMask::~ScopedAffinityMask() { + // If something else has already reset our affinity, do not attempt to + // restrict towards our original mask. This is best-effort as the tampering + // may obviously occur during the destruction of *this. + if (!Tampered()) { + // Note: We do not assert success here, conflicts may restrict us from all + // 'original_cpus_'. + sched_setaffinity(0, sizeof(original_cpus_), &original_cpus_); + } +} + +bool ScopedAffinityMask::Tampered() { + cpu_set_t current_cpus; + CHECK_CONDITION(sched_getaffinity(0, sizeof(current_cpus), ¤t_cpus) == + 0); + return !CPU_EQUAL(¤t_cpus, &specified_cpus_); // Mismatch => modified. +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/tcmalloc/internal/util.h b/tcmalloc/internal/util.h new file mode 100644 index 000000000..f53650ae1 --- /dev/null +++ b/tcmalloc/internal/util.h @@ -0,0 +1,141 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_UTIL_H_ +#define TCMALLOC_INTERNAL_UTIL_H_ + +#include // IWYU pragma: keep +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "absl/base/internal/sysinfo.h" +#include "absl/time/time.h" +#include "absl/types/span.h" + +// Useful internal utility functions. These calls are async-signal safe +// provided the signal handler saves errno at entry and restores it before +// return. +namespace tcmalloc { +namespace tcmalloc_internal { + +// signal_safe_open() - a wrapper for open(2) which ignores signals +// Semantics equivalent to open(2): +// returns a file-descriptor (>=0) on success, -1 on failure, error in errno +int signal_safe_open(const char *path, int flags, ...); + +// signal_safe_close() - a wrapper for close(2) which ignores signals +// Semantics equivalent to close(2): +// returns 0 on success, -1 on failure, error in errno +int signal_safe_close(int fd); + +// signal_safe_write() - a wrapper for write(2) which ignores signals +// Semantics equivalent to write(2): +// returns number of bytes written, -1 on failure, error in errno +// additionally, (if not NULL) total bytes written in *bytes_written +// +// In the interrupted (EINTR) case, signal_safe_write will continue attempting +// to write out buf. This means that in the: +// write->interrupted by signal->write->error case +// That it is possible for signal_safe_write to return -1 when there were bytes +// flushed from the buffer in the first write. To handle this case the optional +// bytes_written parameter is provided, when not-NULL, it will always return the +// total bytes written before any error. +ssize_t signal_safe_write(int fd, const char *buf, size_t count, + size_t *bytes_written); + +// signal_safe_read() - a wrapper for read(2) which ignores signals +// Semantics equivalent to read(2): +// returns number of bytes written, -1 on failure, error in errno +// additionally, (if not NULL) total bytes written in *bytes_written +// +// In the interrupted (EINTR) case, signal_safe_read will continue attempting +// to read into buf. This means that in the: +// read->interrupted by signal->read->error case +// That it is possible for signal_safe_read to return -1 when there were bytes +// read by a previous read. To handle this case the optional bytes_written +// parameter is provided, when not-NULL, it will always return the total bytes +// read before any error. +ssize_t signal_safe_read(int fd, char *buf, size_t count, size_t *bytes_read); + +// signal_safe_poll() - a wrapper for poll(2) which ignores signals +// Semantics equivalent to poll(2): +// Returns number of structures with non-zero revent fields. +// +// In the interrupted (EINTR) case, signal_safe_poll will continue attempting to +// poll for data. Unlike ppoll/pselect, signal_safe_poll is *ignoring* signals +// not attempting to re-enable them. Protecting us from the traditional races +// involved with the latter. +int signal_safe_poll(struct ::pollfd *fds, int nfds, int timeout_ms); + +// WARNING ******************************************************************** +// getenv(2) can only be safely used in the absence of calls which perturb the +// environment (e.g. putenv/setenv/clearenv). The use of such calls is +// strictly thread-hostile since these calls do *NOT* synchronize and there is +// *NO* thread-safe way in which the POSIX **environ array may be queried about +// modification. +// **************************************************************************** +// The default getenv(2) is not guaranteed to be thread-safe as there are no +// semantics specifying the implementation of the result buffer. The result +// from thread_safe_getenv() may be safely queried in a multi-threaded context. +// If you have explicit synchronization with changes environment variables then +// any copies of the returned pointer must be invalidated across modification. +const char* thread_safe_getenv(const char *env_var); + +// Affinity helpers. + +// Returns a vector of the which cpus the currently allowed thread is allowed to +// run on. There are no guarantees that this will not change before, after, or +// even during, the call to AllowedCpus(). +std::vector AllowedCpus(); + +// Enacts a scoped affinity mask on the constructing thread. Attempts to +// restore the original affinity mask on destruction. +// +// REQUIRES: For test-use only. Do not use this in production code. +class ScopedAffinityMask { + public: + // When racing with an external restriction that has a zero-intersection with + // "allowed_cpus" we will construct, but immediately register as "Tampered()", + // without actual changes to affinity. + explicit ScopedAffinityMask(absl::Span allowed_cpus); + explicit ScopedAffinityMask(int allowed_cpu); + + // Restores original affinity iff our scoped affinity has not been externally + // modified (i.e. Tampered()). Otherwise, the updated affinity is preserved. + ~ScopedAffinityMask(); + + // Returns true if the affinity mask no longer matches what was set at point + // of construction. + // + // Note: This is instantaneous and not fool-proof. It's possible for an + // external affinity modification to subsequently align with our originally + // specified "allowed_cpus". In this case Tampered() will return false when + // time may have been spent executing previously on non-specified cpus. + bool Tampered(); + + private: + cpu_set_t original_cpus_, specified_cpus_; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc + +#endif // TCMALLOC_INTERNAL_UTIL_H_ diff --git a/tcmalloc/internal_malloc_extension.h b/tcmalloc/internal_malloc_extension.h new file mode 100644 index 000000000..9f44ca858 --- /dev/null +++ b/tcmalloc/internal_malloc_extension.h @@ -0,0 +1,93 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Extra extensions exported by some malloc implementations. These +// extensions are accessed through a virtual base class so an +// application can link against a malloc that does not implement these +// extensions, and it will get default versions that do nothing. + +#ifndef TCMALLOC_INTERNAL_MALLOC_EXTENSION_H_ +#define TCMALLOC_INTERNAL_MALLOC_EXTENSION_H_ + +#include + +#include "absl/base/attributes.h" +#include "absl/functional/function_ref.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace tcmalloc_internal { + +// AllocationProfilingTokenAccessor and ProfileAccessor provide access to the +// private constructors of AllocationProfilingToken and Profile that take a +// pointer. +class AllocationProfilingTokenAccessor { + public: + static MallocExtension::AllocationProfilingToken MakeToken( + std::unique_ptr p) { + return MallocExtension::AllocationProfilingToken(std::move(p)); + } +}; + +class ProfileAccessor { + public: + static Profile MakeProfile(std::unique_ptr p) { + return Profile(std::move(p)); + } +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc + +#if ABSL_HAVE_ATTRIBUTE_WEAK && !defined(__APPLE__) && !defined(__EMSCRIPTEN__) + +extern "C" { + +ABSL_ATTRIBUTE_WEAK tcmalloc::AddressRegionFactory* +TCMalloc_Internal_GetRegionFactory(); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetRegionFactory( + tcmalloc::AddressRegionFactory* factory); + +ABSL_ATTRIBUTE_WEAK const tcmalloc::tcmalloc_internal::ProfileBase* +TCMalloc_Internal_SnapshotCurrent(tcmalloc::ProfileType type); + +ABSL_ATTRIBUTE_WEAK tcmalloc::tcmalloc_internal::AllocationProfilingTokenBase* +TCMalloc_Internal_StartAllocationProfiling(); + +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_ActivateGuardedSampling(); +ABSL_ATTRIBUTE_WEAK tcmalloc::MallocExtension::Ownership +MallocExtension_Internal_GetOwnership(const void* ptr); +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetMemoryLimit( + tcmalloc::MallocExtension::MemoryLimit* limit); +ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetPerCpuCachesActive(); +ABSL_ATTRIBUTE_WEAK int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize(); +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetProperties( + std::map* ret); +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetStats(std::string* ret); +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxPerCpuCacheSize( + int32_t value); +ABSL_ATTRIBUTE_WEAK size_t MallocExtension_Internal_ReleaseCpuMemory(int cpu); +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_ReleaseMemoryToSystem( + size_t bytes); +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMemoryLimit( + const tcmalloc::MallocExtension::MemoryLimit* limit); + +ABSL_ATTRIBUTE_WEAK size_t MallocExtension_GetAllocatedSize(const void* ptr); +ABSL_ATTRIBUTE_WEAK void MallocExtension_MarkThreadBusy(); +ABSL_ATTRIBUTE_WEAK void MallocExtension_MarkThreadIdle(); +} + +#endif + +#endif // TCMALLOC_INTERNAL_MALLOC_EXTENSION_H_ diff --git a/tcmalloc/libc_override.h b/tcmalloc/libc_override.h new file mode 100644 index 000000000..4284696e8 --- /dev/null +++ b/tcmalloc/libc_override.h @@ -0,0 +1,54 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This .h file imports the code that causes tcmalloc to override libc +// versions of malloc/free/new/delete/etc. That is, it provides the +// logic that makes it so calls to malloc(10) go through tcmalloc, +// rather than the default (libc) malloc. +// +// This file also provides a method: ReplaceSystemAlloc(), that every +// libc_override_*.h file it #includes is required to provide. This +// is called when first setting up tcmalloc -- that is, when a global +// constructor in tcmalloc.cc is executed -- to do any initialization +// work that may be required for this OS. (Note we cannot entirely +// control when tcmalloc is initialized, and the system may do some +// mallocs and frees before this routine is called.) It may be a +// noop. +// +// Every libc has its own way of doing this, and sometimes the compiler +// matters too, so we have a different file for each libc, and often +// for different compilers and OS's. + +#ifndef TCMALLOC_LIBC_OVERRIDE_H_ +#define TCMALLOC_LIBC_OVERRIDE_H_ + +#include + +#include "tcmalloc/tcmalloc.h" + +static void ReplaceSystemAlloc(); // defined in the .h files below + +#if defined(OS_WINDOWS) +// We don't do any overriding on windows. Just provide a dummy function. +static void ReplaceSystemAlloc() { } + +#elif defined(__GLIBC__) +#include "tcmalloc/libc_override_glibc.h" + +#else +#error Need to add support for your libc/OS here + +#endif + +#endif // TCMALLOC_LIBC_OVERRIDE_H_ diff --git a/tcmalloc/libc_override_gcc_and_weak.h b/tcmalloc/libc_override_gcc_and_weak.h new file mode 100644 index 000000000..66fb75ff9 --- /dev/null +++ b/tcmalloc/libc_override_gcc_and_weak.h @@ -0,0 +1,117 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Used to override malloc routines on systems that define the +// memory allocation routines to be weak symbols in their libc +// (almost all unix-based systems are like this), on gcc, which +// suppports the 'alias' attribute. + +#ifndef TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_ +#define TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_ + +#include +#include + +#include + +#include "tcmalloc/tcmalloc.h" + +#ifndef __GNUC__ +# error libc_override_gcc_and_weak.h is for gcc distributions only. +#endif + +// visibility("default") ensures that these symbols are always exported, even +// with -fvisibility=hidden. +#define TCMALLOC_ALIAS(tc_fn) \ + __attribute__((alias(#tc_fn), visibility("default"))) + +void* operator new(size_t size) noexcept(false) + TCMALLOC_ALIAS(TCMallocInternalNew); +void operator delete(void* p) noexcept TCMALLOC_ALIAS(TCMallocInternalDelete); +void operator delete(void* p, size_t size) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteSized); +void* operator new[](size_t size) noexcept(false) + TCMALLOC_ALIAS(TCMallocInternalNewArray); +void operator delete[](void* p) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteArray); +void operator delete[](void* p, size_t size) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteArraySized); +void* operator new(size_t size, const std::nothrow_t& nt) noexcept + TCMALLOC_ALIAS(TCMallocInternalNewNothrow); +void* operator new[](size_t size, const std::nothrow_t& nt) noexcept + TCMALLOC_ALIAS(TCMallocInternalNewArrayNothrow); +void operator delete(void* p, const std::nothrow_t& nt) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteNothrow); +void operator delete[](void* p, const std::nothrow_t& nt) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteArrayNothrow); + +void* operator new(size_t size, std::align_val_t alignment) noexcept(false) + TCMALLOC_ALIAS(TCMallocInternalNewAligned); +void* operator new(size_t size, std::align_val_t alignment, + const std::nothrow_t&) noexcept + TCMALLOC_ALIAS(TCMallocInternalNewAligned_nothrow); +void operator delete(void* p, std::align_val_t alignment) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteAligned); +void operator delete(void* p, std::align_val_t alignment, + const std::nothrow_t&) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteAligned_nothrow); +void operator delete(void* p, size_t size, std::align_val_t alignment) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteSizedAligned); +void* operator new[](size_t size, std::align_val_t alignment) noexcept(false) + TCMALLOC_ALIAS(TCMallocInternalNewArrayAligned); +void* operator new[](size_t size, std::align_val_t alignment, + const std::nothrow_t&) noexcept + TCMALLOC_ALIAS(TCMallocInternalNewArrayAligned_nothrow); +void operator delete[](void* p, std::align_val_t alignment) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteArrayAligned); +void operator delete[](void* p, std::align_val_t alignment, + const std::nothrow_t&) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteArrayAligned_nothrow); +void operator delete[](void* p, size_t size, + std::align_val_t alignemnt) noexcept + TCMALLOC_ALIAS(TCMallocInternalDeleteArraySizedAligned); + +extern "C" { +void* malloc(size_t size) noexcept TCMALLOC_ALIAS(TCMallocInternalMalloc); +void free(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalFree); +void sdallocx(void* ptr, size_t size, int flags) noexcept + TCMALLOC_ALIAS(TCMallocInternalSdallocx); +void* realloc(void* ptr, size_t size) noexcept + TCMALLOC_ALIAS(TCMallocInternalRealloc); +void* calloc(size_t n, size_t size) noexcept + TCMALLOC_ALIAS(TCMallocInternalCalloc); +void cfree(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalCfree); +void* memalign(size_t align, size_t s) noexcept + TCMALLOC_ALIAS(TCMallocInternalMemalign); +void* aligned_alloc(size_t align, size_t s) noexcept + TCMALLOC_ALIAS(TCMallocInternalAlignedAlloc); +void* valloc(size_t size) noexcept TCMALLOC_ALIAS(TCMallocInternalValloc); +void* pvalloc(size_t size) noexcept TCMALLOC_ALIAS(TCMallocInternalPvalloc); +int posix_memalign(void** r, size_t a, size_t s) noexcept + TCMALLOC_ALIAS(TCMallocInternalPosixMemalign); +void malloc_stats(void) noexcept TCMALLOC_ALIAS(TCMallocInternalMallocStats); +int mallopt(int cmd, int value) noexcept + TCMALLOC_ALIAS(TCMallocInternalMallOpt); +struct mallinfo mallinfo(void) noexcept + TCMALLOC_ALIAS(TCMallocInternalMallocInfo); +size_t malloc_size(void* p) noexcept TCMALLOC_ALIAS(TCMallocInternalMallocSize); +size_t malloc_usable_size(void* p) noexcept + TCMALLOC_ALIAS(TCMallocInternalMallocSize); +} // extern "C" + +// No need to do anything at tcmalloc-registration time: we do it all +// via overriding weak symbols (at link time). +static void ReplaceSystemAlloc() { } + +#endif // TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_ diff --git a/tcmalloc/libc_override_glibc.h b/tcmalloc/libc_override_glibc.h new file mode 100644 index 000000000..c6fc626bb --- /dev/null +++ b/tcmalloc/libc_override_glibc.h @@ -0,0 +1,125 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Used to override malloc routines on systems that are using glibc. + +#ifndef TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_ +#define TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_ + +#include +#include + +#include "tcmalloc/tcmalloc.h" + +#ifndef __GLIBC__ +# error libc_override_glibc.h is for glibc distributions only. +#endif + +// In glibc, the memory-allocation methods are weak symbols, so we can +// just override them with our own. If we're using gcc, we can use +// __attribute__((alias)) to do the overriding easily (exception: +// Mach-O, which doesn't support aliases). Otherwise we have to use a +// function call. +#if !defined(__GNUC__) || defined(__MACH__) + +// This also defines ReplaceSystemAlloc(). +#include "libc_override_redefine.h" + +#else // #if !defined(__GNUC__) || defined(__MACH__) + +// If we get here, we're a gcc system, so do all the overriding we do +// with gcc. This does the overriding of all the 'normal' memory +// allocation. This also defines ReplaceSystemAlloc(). +# include "libc_override_gcc_and_weak.h" + +// We also have to do some glibc-specific overriding. Some library +// routines on RedHat 9 allocate memory using malloc() and free it +// using __libc_free() (or vice-versa). Since we provide our own +// implementations of malloc/free, we need to make sure that the +// __libc_XXX variants (defined as part of glibc) also point to the +// same implementations. Since it only matters for redhat, we +// do it inside the gcc #ifdef, since redhat uses gcc. +// TODO(b/134690953): only do this if we detect we're an old enough glibc? + +extern "C" { +void* __libc_malloc(size_t size) noexcept + TCMALLOC_ALIAS(TCMallocInternalMalloc); +void __libc_free(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalFree); +void* __libc_realloc(void* ptr, size_t size) noexcept + TCMALLOC_ALIAS(TCMallocInternalRealloc); +void* __libc_calloc(size_t n, size_t size) noexcept + TCMALLOC_ALIAS(TCMallocInternalCalloc); +void __libc_cfree(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalCfree); +void* __libc_memalign(size_t align, size_t s) noexcept + TCMALLOC_ALIAS(TCMallocInternalMemalign); +void* __libc_valloc(size_t size) noexcept + TCMALLOC_ALIAS(TCMallocInternalValloc); +void* __libc_pvalloc(size_t size) noexcept + TCMALLOC_ALIAS(TCMallocInternalPvalloc); +int __posix_memalign(void** r, size_t a, size_t s) noexcept + TCMALLOC_ALIAS(TCMallocInternalPosixMemalign); +} // extern "C" + +#endif // #if defined(__GNUC__) && !defined(__MACH__) + + +// We also have to hook libc malloc. While our work with weak symbols +// should make sure libc malloc is never called in most situations, it +// can be worked around by shared libraries with the DEEPBIND +// environment variable set. The below hooks libc to call our malloc +// routines even in that situation. In other situations, this hook +// should never be called. +extern "C" { +static void* glibc_override_malloc(size_t size, const void *caller) { + return TCMallocInternalMalloc(size); +} +static void* glibc_override_realloc(void *ptr, size_t size, + const void *caller) { + return TCMallocInternalRealloc(ptr, size); +} +static void glibc_override_free(void *ptr, const void *caller) { + TCMallocInternalFree(ptr); +} +static void* glibc_override_memalign(size_t align, size_t size, + const void *caller) { + return TCMallocInternalMemalign(align, size); +} + +// We should be using __malloc_initialize_hook here. (See +// http://swoolley.org/man.cgi/3/malloc_hook.) However, this causes weird +// linker errors with programs that link with -static, so instead we just assign +// the vars directly at static-constructor time. That should serve the same +// effect of making sure the hooks are set before the first malloc call the +// program makes. + +// Glibc-2.14 and above make __malloc_hook and friends volatile +#ifndef __MALLOC_HOOK_VOLATILE +#define __MALLOC_HOOK_VOLATILE /**/ +#endif + +void* (* __MALLOC_HOOK_VOLATILE __malloc_hook)(size_t, const void*) = + &glibc_override_malloc; +void* (* __MALLOC_HOOK_VOLATILE __realloc_hook)(void*, size_t, const void*) = + &glibc_override_realloc; +void (* __MALLOC_HOOK_VOLATILE __free_hook)(void*, const void*) = + &glibc_override_free; +void* (* __MALLOC_HOOK_VOLATILE __memalign_hook)(size_t,size_t, const void*) = + &glibc_override_memalign; + +} // extern "C" + +// No need to write ReplaceSystemAlloc(); one of the #includes above +// did it for us. + +#endif // TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_ diff --git a/tcmalloc/libc_override_redefine.h b/tcmalloc/libc_override_redefine.h new file mode 100644 index 000000000..60549dda7 --- /dev/null +++ b/tcmalloc/libc_override_redefine.h @@ -0,0 +1,87 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Used on systems that don't have their own definition of +// malloc/new/etc. (Typically this will be a windows msvcrt.dll that +// has been edited to remove the definitions.) We can just define our +// own as normal functions. +// +// This should also work on systems were all the malloc routines are +// defined as weak symbols, and there's no support for aliasing. + +#ifndef TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_ +#define TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_ + +#include + +#include +#include + +#include "tcmalloc/tcmalloc.h" + +void* operator new(size_t size) { return TCMallocInternalNew(size); } +void operator delete(void* p) noexcept { TCMallocInternalDelete(p); } +void* operator new[](size_t size) { return TCMallocInternalNewArray(size); } +void operator delete[](void* p) noexcept { TCMallocInternalDeleteArray(p); } +void* operator new(size_t size, const std::nothrow_t& nt) noexcept { + return TCMallocInternalNewNothrow(size, nt); +} +void* operator new[](size_t size, const std::nothrow_t& nt) noexcept { + return TCMallocInternalNewArrayNothrow(size, nt); +} +void operator delete(void* ptr, const std::nothrow_t& nt) noexcept { + return TCMallocInternalDeleteNothrow(ptr, nt); +} +void operator delete[](void* ptr, const std::nothrow_t& nt) noexcept { + return TCMallocInternalDeleteArrayNothrow(ptr, nt); +} +extern "C" { +void* malloc(size_t s) noexcept { return TCMallocInternalMalloc(s); } +void free(void* p) noexcept { TCMallocInternalFree(p); } +void sdallocx(void* p, size_t s, int flags) { + TCMallocInternalSdallocx(p, s, flags); +} +void* realloc(void* p, size_t s) noexcept { + return TCMallocInternalRealloc(p, s); +} +void* calloc(size_t n, size_t s) noexcept { + return TCMallocInternalCalloc(n, s); +} +void cfree(void* p) noexcept { TCMallocInternalCfree(p); } +void* memalign(size_t a, size_t s) noexcept { + return TCMallocInternalMemalign(a, s); +} +void* valloc(size_t s) noexcept { return TCMallocInternalValloc(s); } +void* pvalloc(size_t s) noexcept { return TCMallocInternalPvalloc(s); } +int posix_memalign(void** r, size_t a, size_t s) noexcept { + return TCMallocInternalPosixMemalign(r, a, s); +} +void malloc_stats(void) noexcept { TCMallocInternalMallocStats(); } +int mallopt(int cmd, int v) noexcept { return TCMallocInternalMallOpt(cmd, v); } +#ifdef HAVE_STRUCT_MALLINFO +struct mallinfo mallinfo(void) noexcept { + return TCMallocInternalMallocInfo(); +} +#endif +size_t malloc_size(void* p) noexcept { return TCMallocInternalMallocSize(p); } +size_t malloc_usable_size(void* p) noexcept { + return TCMallocInternalMallocSize(p); +} +} // extern "C" + +// No need to do anything at tcmalloc-registration time: we do it all +// via overriding weak symbols (at link time). +static void ReplaceSystemAlloc() { } + +#endif // TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_ diff --git a/tcmalloc/malloc_extension.cc b/tcmalloc/malloc_extension.cc new file mode 100644 index 000000000..007fc4344 --- /dev/null +++ b/tcmalloc/malloc_extension.cc @@ -0,0 +1,451 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/malloc_extension.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include "absl/base/dynamic_annotations.h" +#include "absl/base/internal/low_level_alloc.h" +#include "absl/memory/memory.h" +#include "tcmalloc/internal_malloc_extension.h" + +namespace tcmalloc { + +MallocExtension::AllocationProfilingToken::AllocationProfilingToken( + std::unique_ptr impl) + : impl_(std::move(impl)) {} + +MallocExtension::AllocationProfilingToken::~AllocationProfilingToken() {} + +Profile MallocExtension::AllocationProfilingToken::Stop() && { + std::unique_ptr p( + std::move(impl_)); + if (!p) { + return Profile(); + } + return std::move(*p).Stop(); +} + +Profile::Profile(std::unique_ptr impl) + : impl_(std::move(impl)) {} + +Profile::~Profile() {} + +void Profile::Iterate(absl::FunctionRef f) const { + if (!impl_) { + return; + } + + impl_->Iterate(f); +} + +int64_t Profile::Period() const { + if (!impl_) { + return -1; + } + + return impl_->Period(); +} + +ProfileType Profile::Type() const { + if (!impl_) { + return ProfileType::kDoNotUse; + } + + return impl_->Type(); +} + +AddressRegion::~AddressRegion() {} + +AddressRegionFactory::~AddressRegionFactory() {} + +size_t AddressRegionFactory::GetStats(absl::Span buffer) { + static_cast(buffer); + return 0; +} + +size_t AddressRegionFactory::GetStatsInPbtxt(absl::Span buffer) { + static_cast(buffer); + return 0; +} + +static std::atomic address_region_factory_internal_bytes_allocated(0); + +size_t AddressRegionFactory::InternalBytesAllocated() { + return address_region_factory_internal_bytes_allocated.load( + std::memory_order_relaxed); +} + +void* AddressRegionFactory::MallocInternal(size_t size) { + // Use arena without malloc hooks to avoid HeapChecker reporting a leak. + static auto* arena = + absl::base_internal::LowLevelAlloc::NewArena(/*flags=*/0); + void* result = + absl::base_internal::LowLevelAlloc::AllocWithArena(size, arena); + if (result) { + address_region_factory_internal_bytes_allocated.fetch_add( + size, std::memory_order_relaxed); + } + return result; +} + +#if !ABSL_HAVE_ATTRIBUTE_WEAK || defined(__APPLE__) || defined(__EMSCRIPTEN__) +#define ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS 0 +#else +#define ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS 1 + +extern "C" { + +ABSL_ATTRIBUTE_WEAK int64_t TCMalloc_GetProfileSamplingRate(); +ABSL_ATTRIBUTE_WEAK void TCMalloc_SetProfileSamplingRate(int64_t); + +ABSL_ATTRIBUTE_WEAK int64_t TCMalloc_GetGuardedSamplingRate(); +ABSL_ATTRIBUTE_WEAK void TCMalloc_SetGuardedSamplingRate(int64_t); + +ABSL_ATTRIBUTE_WEAK int64_t TCMalloc_GetMaxTotalThreadCacheBytes(); +ABSL_ATTRIBUTE_WEAK void TCMalloc_SetMaxTotalThreadCacheBytes(int64_t value); + +ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetNumericProperty( + const char* name_data, size_t name_size, size_t* value); +} +#endif + +std::string MallocExtension::GetStats() { + std::string ret; +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&MallocExtension_Internal_GetStats != nullptr) { + MallocExtension_Internal_GetStats(&ret); + } +#endif + return ret; +} + +void MallocExtension::ReleaseMemoryToSystem(size_t num_bytes) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&MallocExtension_Internal_ReleaseMemoryToSystem != nullptr) { + MallocExtension_Internal_ReleaseMemoryToSystem(num_bytes); + } +#endif +} + +AddressRegionFactory* MallocExtension::GetRegionFactory() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&TCMalloc_Internal_GetRegionFactory == nullptr) { + return nullptr; + } + + return TCMalloc_Internal_GetRegionFactory(); +#else + return nullptr; +#endif +} + +void MallocExtension::SetRegionFactory(AddressRegionFactory* factory) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&TCMalloc_Internal_SetRegionFactory == nullptr) { + return; + } + + TCMalloc_Internal_SetRegionFactory(factory); +#endif + // Default implementation does nothing +} + +Profile MallocExtension::SnapshotCurrent(tcmalloc::ProfileType type) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&TCMalloc_Internal_SnapshotCurrent == nullptr) { + return Profile(); + } + + return tcmalloc_internal::ProfileAccessor::MakeProfile( + std::unique_ptr( + TCMalloc_Internal_SnapshotCurrent(type))); +#else + return Profile(); +#endif +} + +MallocExtension::AllocationProfilingToken +MallocExtension::StartAllocationProfiling() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&TCMalloc_Internal_StartAllocationProfiling == nullptr) { + return {}; + } + + return tcmalloc_internal::AllocationProfilingTokenAccessor::MakeToken( + std::unique_ptr( + TCMalloc_Internal_StartAllocationProfiling())); +#else + return {}; +#endif +} + +void MallocExtension::MarkThreadIdle() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&MallocExtension_MarkThreadIdle == nullptr) { + return; + } + + MallocExtension_MarkThreadIdle(); +#endif +} + +void MallocExtension::MarkThreadBusy() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&MallocExtension_MarkThreadBusy == nullptr) { + return; + } + + MallocExtension_MarkThreadBusy(); +#endif +} + +MallocExtension::MemoryLimit MallocExtension::GetMemoryLimit() { + MemoryLimit ret; +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&MallocExtension_Internal_GetMemoryLimit != nullptr) { + MallocExtension_Internal_GetMemoryLimit(&ret); + } +#endif + return ret; +} + +void MallocExtension::SetMemoryLimit( + const MallocExtension::MemoryLimit& limit) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&MallocExtension_Internal_SetMemoryLimit != nullptr) { + MallocExtension_Internal_SetMemoryLimit(&limit); + } +#endif +} + +int64_t MallocExtension::GetProfileSamplingRate() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&TCMalloc_GetProfileSamplingRate != nullptr) { + return TCMalloc_GetProfileSamplingRate(); + } +#endif + return -1; +} + +void MallocExtension::SetProfileSamplingRate(int64_t rate) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&TCMalloc_SetProfileSamplingRate != nullptr) { + TCMalloc_SetProfileSamplingRate(rate); + } +#endif + (void) rate; +} + +int64_t MallocExtension::GetGuardedSamplingRate() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (TCMalloc_GetGuardedSamplingRate == nullptr) { + return -1; + } + + return TCMalloc_GetGuardedSamplingRate(); +#else + return -1; +#endif +} + +void MallocExtension::SetGuardedSamplingRate(int64_t rate) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (TCMalloc_SetGuardedSamplingRate == nullptr) { + return; + } + + TCMalloc_SetGuardedSamplingRate(rate); +#else + (void) rate; +#endif +} + +void MallocExtension::ActivateGuardedSampling() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&MallocExtension_Internal_ActivateGuardedSampling != nullptr) { + MallocExtension_Internal_ActivateGuardedSampling(); + } +#endif +} + +bool MallocExtension::PerCpuCachesActive() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_GetPerCpuCachesActive == nullptr) { + return false; + } + + return MallocExtension_Internal_GetPerCpuCachesActive(); +#else + return false; +#endif +} + +int32_t MallocExtension::GetMaxPerCpuCacheSize() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_GetMaxPerCpuCacheSize == nullptr) { + return -1; + } + + return MallocExtension_Internal_GetMaxPerCpuCacheSize(); +#else + return -1; +#endif +} + +void MallocExtension::SetMaxPerCpuCacheSize(int32_t value) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_SetMaxPerCpuCacheSize == nullptr) { + return; + } + + MallocExtension_Internal_SetMaxPerCpuCacheSize(value); +#else + (void) value; +#endif +} + +int64_t MallocExtension::GetMaxTotalThreadCacheBytes() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (TCMalloc_GetMaxTotalThreadCacheBytes == nullptr) { + return -1; + } + + return TCMalloc_GetMaxTotalThreadCacheBytes(); +#else + return -1; +#endif +} + +void MallocExtension::SetMaxTotalThreadCacheBytes(int64_t value) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (TCMalloc_SetMaxTotalThreadCacheBytes == nullptr) { + return; + } + + TCMalloc_SetMaxTotalThreadCacheBytes(value); +#else + (void) value; +#endif +} + +absl::optional MallocExtension::GetNumericProperty( + absl::string_view property) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&MallocExtension_Internal_GetNumericProperty != nullptr) { + size_t value; + if (MallocExtension_Internal_GetNumericProperty(property.data(), + property.size(), &value)) { + return value; + } + } +#endif + return absl::nullopt; +} + +size_t MallocExtension::GetEstimatedAllocatedSize(size_t size) { + return nallocx(size, 0); +} + +absl::optional MallocExtension::GetAllocatedSize(const void* p) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_GetAllocatedSize != nullptr) { + return MallocExtension_GetAllocatedSize(p); + } +#endif + return absl::nullopt; +} + +MallocExtension::Ownership MallocExtension::GetOwnership(const void* p) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_GetOwnership != nullptr) { + return MallocExtension_Internal_GetOwnership(p); + } +#endif + return MallocExtension::Ownership::kUnknown; +} + +std::map +MallocExtension::GetProperties() { + std::map ret; +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&MallocExtension_Internal_GetProperties != nullptr) { + MallocExtension_Internal_GetProperties(&ret); + } +#endif + return ret; +} + +size_t MallocExtension::ReleaseCpuMemory(int cpu) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_ReleaseCpuMemory != nullptr) { + return MallocExtension_Internal_ReleaseCpuMemory(cpu); + } +#endif + return 0; +} + +} // namespace tcmalloc + +// Default implementation just returns size. The expectation is that +// the linked-in malloc implementation might provide an override of +// this weak function with a better implementation. +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE size_t nallocx(size_t size, + int) noexcept { + return size; +} + +// Default implementation just frees memory. The expectation is that the +// linked-in malloc implementation may provide an override with an +// implementation that uses this optimization. +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE void sdallocx(void* ptr, size_t, + int) noexcept { + free(ptr); +} + +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t +tcmalloc_size_returning_operator_new(size_t size) { + return {::operator new(size), size}; +} + +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t +tcmalloc_size_returning_operator_new_nothrow(size_t size) noexcept { + void* p = ::operator new(size, std::nothrow); + return {p, p ? size : 0}; +} + +#if defined(_LIBCPP_VERSION) && defined(__cpp_aligned_new) + +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t +tcmalloc_size_returning_operator_new_aligned(size_t size, + std::align_val_t alignment) { + return {::operator new(size, alignment), size}; +} + +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t +tcmalloc_size_returning_operator_new_aligned_nothrow( + size_t size, std::align_val_t alignment) noexcept { + void* p = ::operator new(size, alignment, std::nothrow); + return {p, p ? size : 0}; +} + +#endif // _LIBCPP_VERSION && __cpp_aligned_new diff --git a/tcmalloc/malloc_extension.h b/tcmalloc/malloc_extension.h new file mode 100644 index 000000000..9c3a3f217 --- /dev/null +++ b/tcmalloc/malloc_extension.h @@ -0,0 +1,549 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file documents extensions supported by TCMalloc. These extensions +// provide hooks for both surfacing telemetric data about TCMalloc's usage and +// tuning the internal implementation of TCMalloc. The internal implementation +// functions use weak linkage, allowing an application to link against the +// extensions without always linking against TCMalloc. + +#ifndef TCMALLOC_MALLOC_EXTENSION_H_ +#define TCMALLOC_MALLOC_EXTENSION_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/functional/function_ref.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "absl/types/span.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +class AllocationProfilingTokenAccessor; +class AllocationProfilingTokenBase; +class ProfileAccessor; +class ProfileBase; +} // namespace tcmalloc_internal + +enum class ProfileType { + // Approximation of current heap usage + kHeap, + + // Fragmentation report + kFragmentation, + + // Sample of objects that were live at a recent peak of total heap usage. The + // specifics of when exactly this profile is collected are subject to change. + kPeakHeap, + + // Sample of objects allocated from the start of allocation profiling until + // the profile was terminated with Stop(). + kAllocations, + + // Only present to prevent switch statements without a default clause so that + // we can extend this enumeration without breaking code. + kDoNotUse, +}; + +class Profile { + public: + Profile() = default; + Profile(Profile&&) = default; + Profile(const Profile&) = delete; + + ~Profile(); + + Profile& operator=(Profile&&) = default; + Profile& operator=(const Profile&) = delete; + + struct Sample { + static constexpr int kMaxStackDepth = 64; + + int64_t sum; + int64_t count; // Total added with this + + size_t requested_size; + size_t requested_alignment; + size_t allocated_size; + + int depth; + void* stack[kMaxStackDepth]; + }; + + void Iterate(absl::FunctionRef f) const; + + int64_t Period() const; + ProfileType Type() const; + + private: + explicit Profile(std::unique_ptr); + + std::unique_ptr impl_; + friend class tcmalloc_internal::ProfileAccessor; +}; + +class AddressRegion { + public: + AddressRegion() {} + virtual ~AddressRegion(); + + // Allocates at least size bytes of memory from this region, aligned with + // alignment. Returns a pair containing a pointer to the start the allocated + // memory and the actual size allocated. Returns {nullptr, 0} on failure. + // + // Alloc must return memory located within the address range given in the call + // to AddressRegionFactory::Create that created this AddressRegion. + virtual std::pair Alloc(size_t size, size_t alignment) = 0; +}; + +// Interface to a pluggable address region allocator. +class AddressRegionFactory { + public: + enum class UsageHint { + kNormal, // Normal usage. + kInfrequent, // Used less frequently than normal regions. + }; + + AddressRegionFactory() {} + virtual ~AddressRegionFactory(); + + // Returns an AddressRegion with the specified start address and size. hint + // indicates how the caller intends to use the returned region (helpful for + // deciding which regions to remap with hugepages, which regions should have + // pages prefaulted, etc.). The returned AddressRegion must never be deleted. + // + // The caller must have reserved size bytes of address space starting at + // start_addr with mmap(PROT_NONE) prior to calling this function (so it is + // safe for Create() to mmap(MAP_FIXED) over the specified address range). + // start_addr and size are always page-aligned. + virtual AddressRegion* Create(void* start_addr, size_t size, + UsageHint hint) = 0; + + // Gets a human-readable description of the current state of the allocator. + // + // The state is stored in the provided buffer. The number of bytes used (or + // would have been required, had the buffer been of sufficient size) is + // returned. + virtual size_t GetStats(absl::Span buffer); + + // Gets a description of the current state of the allocator in pbtxt format. + // + // The state is stored in the provided buffer. The number of bytes used (or + // would have been required, had the buffer been of sufficient size) is + // returned. + virtual size_t GetStatsInPbtxt(absl::Span buffer); + + // Returns the total number of bytes allocated by MallocInternal(). + static size_t InternalBytesAllocated(); + + protected: + // Dynamically allocates memory for use by AddressRegionFactory. Particularly + // useful for creating AddressRegions inside Create(). + // + // This memory is never freed, so allocate sparingly. + static void* MallocInternal(size_t size); +}; + +class MallocExtension { + public: + + // Gets a human readable description of the current state of the malloc data + // structures. + // + // See /tcmalloc/docs/stats.md for how to interpret these statistics. + static std::string GetStats(); + + // ------------------------------------------------------------------- + // Control operations for getting malloc implementation specific parameters. + // Some currently useful properties: + // + // generic + // ------- + // "generic.current_allocated_bytes" + // Number of bytes currently allocated by application + // + // "generic.heap_size" + // Number of bytes in the heap == + // current_allocated_bytes + + // fragmentation + + // freed (but not released to OS) memory regions + // + // tcmalloc + // -------- + // "tcmalloc.max_total_thread_cache_bytes" + // Upper limit on total number of bytes stored across all + // per-thread caches. Default: 16MB. + // + // "tcmalloc.current_total_thread_cache_bytes" + // Number of bytes used across all thread caches. + // + // "tcmalloc.pageheap_free_bytes" + // Number of bytes in free, mapped pages in page heap. These + // bytes can be used to fulfill allocation requests. They + // always count towards virtual memory usage, and unless the + // underlying memory is swapped out by the OS, they also count + // towards physical memory usage. + // + // "tcmalloc.pageheap_unmapped_bytes" + // Number of bytes in free, unmapped pages in page heap. + // These are bytes that have been released back to the OS, + // possibly by one of the MallocExtension "Release" calls. + // They can be used to fulfill allocation requests, but + // typically incur a page fault. They always count towards + // virtual memory usage, and depending on the OS, typically + // do not count towards physical memory usage. + // + // "tcmalloc.per_cpu_caches_active" + // Whether tcmalloc is using per-CPU caches (1 or 0 respectively). + // ------------------------------------------------------------------- + + // Gets the named property's value or a nullopt if the property is not valid. + static absl::optional GetNumericProperty(absl::string_view property); + + // Marks the current thread as "idle". This function may optionally be called + // by threads as a hint to the malloc implementation that any thread-specific + // resources should be released. Note: this may be an expensive function, so + // it should not be called too often. + // + // Also, if the code that calls this function will go to sleep for a while, it + // should take care to not allocate anything between the call to this function + // and the beginning of the sleep. + static void MarkThreadIdle(); + + // Marks the current thread as "busy". This function should be called after + // MarkThreadIdle() if the thread will now do more work. If this method is + // not called, performance may suffer. + static void MarkThreadBusy(); + + // Attempts to free any resources associated with cpu (in the sense of + // only being usable from that CPU.) Returns the number of bytes previously + // assigned to "cpu" that were freed. Safe to call from any processor, not + // just . + static size_t ReleaseCpuMemory(int cpu); + + // Gets the region factory used by the malloc extension instance. Returns null + // for malloc implementations that do not support pluggable region factories. + static AddressRegionFactory* GetRegionFactory(); + + // Sets the region factory to the specified. + // + // Users could register their own region factories by doing: + // factory = new MyOwnRegionFactory(); + // MallocExtension::SetRegionFactory(factory); + // + // It's up to users whether to fall back (recommended) to the default region + // factory (use GetRegionFactory() above) or not. The caller is responsible to + // any necessary locking. + static void SetRegionFactory(AddressRegionFactory* a); + + // Tries to release num_bytes of free memory back to the OS for reuse. + // + // This request may not be completely honored if: + // * The underlying malloc implementation does not support releasing memory to + // the OS. + // * There are not at least num_bytes of free memory cached, or free memory is + // fragmented in ways that keep it from being returned to the OS. + // + // Returning memory to the OS can hurt performance in two ways: + // * Parts of huge pages may be free and returning them to the OS requires + // breaking up the huge page they are located on. This can slow accesses to + // still-allocated memory due to increased TLB pressure for the working set. + // * If the memory is ultimately needed again, pages will need to be faulted + // back in. + static void ReleaseMemoryToSystem(size_t num_bytes); + + struct MemoryLimit { + // Make a best effort attempt to prevent more than limit bytes of memory + // from being allocated by the system. In particular, if satisfying a given + // malloc call would require passing this limit, release as much memory to + // the OS as needed to stay under it if possible. + // + // If hard is set, crash if returning memory is unable to get below the + // limit. + // + // Note: limit=SIZE_T_MAX implies no limit. + size_t limit = std::numeric_limits::max(); + bool hard = false; + }; + + static MemoryLimit GetMemoryLimit(); + static void SetMemoryLimit(const MemoryLimit& limit); + + // Gets the sampling rate. Returns a value < 0 if unknown. + static int64_t GetProfileSamplingRate(); + // Sets the sampling rate for heap profiles. TCMalloc samples approximately + // every rate bytes allocated. + static void SetProfileSamplingRate(int64_t rate); + + // Gets the guarded sampling rate. Returns a value < 0 if unknown. + static int64_t GetGuardedSamplingRate(); + // Sets the guarded sampling rate for sampled allocations. Guarded samples + // provide probablistic protections against buffer underflow, overflow, and + // use-after-free. + static void SetGuardedSamplingRate(int64_t rate); + + // Switches TCMalloc to guard sampled allocations for underflow, overflow, and + // use-after-free according to the guarded sample parameter value. + static void ActivateGuardedSampling(); + + // Gets whether TCMalloc is using per-CPU caches. + static bool PerCpuCachesActive(); + + // Gets the current maximum cache size per CPU cache. + static int32_t GetMaxPerCpuCacheSize(); + // Sets the maximum cache size per CPU cache. This is a per-core limit. + static void SetMaxPerCpuCacheSize(int32_t value); + + // Gets the current maximum thread cache. + static int64_t GetMaxTotalThreadCacheBytes(); + // Sets the maximum thread cache size. This is a whole-process limit. + static void SetMaxTotalThreadCacheBytes(int64_t value); + + // Returns the estimated number of bytes that will be allocated for a request + // of "size" bytes. This is an estimate: an allocation of "size" bytes may + // reserve more bytes, but will never reserve fewer. + static size_t GetEstimatedAllocatedSize(size_t size); + + // Returns the actual number N of bytes reserved by tcmalloc for the pointer + // p. This number may be equal to or greater than the number of bytes + // requested when p was allocated. + // + // This function is just useful for statistics collection. The client must + // *not* read or write from the extra bytes that are indicated by this call. + // + // Example, suppose the client gets memory by calling + // p = malloc(10) + // and GetAllocatedSize(p) returns 16. The client must only use the first 10 + // bytes p[0..9], and not attempt to read or write p[10..15]. + // + // p must have been allocated by TCMalloc and must not be an interior pointer + // -- that is, must be exactly the pointer returned to by malloc() et al., not + // some offset from that -- and should not have been freed yet. p may be + // null. + static absl::optional GetAllocatedSize(const void* p); + + // Returns + // * kOwned if TCMalloc allocated the memory pointed to by p, or + // * kNotOwned if allocated elsewhere or p is null. + // + // REQUIRES: p must be a value returned from a previous call to malloc(), + // calloc(), realloc(), memalign(), posix_memalign(), valloc(), pvalloc(), + // new, or new[], and must refer to memory that is currently allocated (so, + // for instance, you should not pass in a pointer after having called free() + // on it). + enum class Ownership { + kUnknown = 0, + kOwned, + kNotOwned + }; + static Ownership GetOwnership(const void* p); + + // Type used by GetProperties. See comment on GetProperties. + struct Property { + size_t value; + }; + + // Returns detailed statistics about the state of TCMalloc. The map is keyed + // by the name of the statistic. + // + // Common across malloc implementations: + // generic.bytes_in_use_by_app -- Bytes currently in use by application + // generic.physical_memory_used -- Overall (including malloc internals) + // generic.virtual_memory_used -- Overall (including malloc internals) + // + // Tcmalloc specific properties + // tcmalloc.cpu_free -- Bytes in per-cpu free-lists + // tcmalloc.thread_cache_free -- Bytes in per-thread free-lists + // tcmalloc.transfer_cache -- Bytes in cross-thread transfer caches + // tcmalloc.central_cache_free -- Bytes in central cache + // tcmalloc.page_heap_free -- Bytes in page heap + // tcmalloc.page_heap_unmapped -- Bytes in page heap (no backing phys. mem) + // tcmalloc.metadata_bytes -- Used by internal data structures + // tcmalloc.thread_cache_count -- Number of thread caches in use + // tcmalloc.experiment.NAME -- Experiment NAME is running if 1 + static std::map GetProperties(); + + static Profile SnapshotCurrent(tcmalloc::ProfileType type); + + // AllocationProfilingToken tracks an active profiling session started with + // StartAllocationProfiling. Profiling continues until Stop() is called. + class AllocationProfilingToken { + public: + AllocationProfilingToken() = default; + AllocationProfilingToken(AllocationProfilingToken&&) = default; + AllocationProfilingToken(const AllocationProfilingToken&) = delete; + ~AllocationProfilingToken(); + + AllocationProfilingToken& operator=(AllocationProfilingToken&&) = default; + AllocationProfilingToken& operator=(const AllocationProfilingToken&) = + delete; + + // Finish the recording started by the corresponding call to + // StartAllocationProfile, and return samples of calls to each function. If + // it is called more than once, subsequent calls will return an empty + // profile. + Profile Stop() &&; + + private: + explicit AllocationProfilingToken( + std::unique_ptr); + + std::unique_ptr impl_; + friend class tcmalloc_internal::AllocationProfilingTokenAccessor; + }; + + // Start recording a sample of allocation and deallocation calls. Returns + // null if the implementation does not support profiling. + static AllocationProfilingToken StartAllocationProfiling(); +}; + +} // namespace tcmalloc + +// The nallocx function allocates no memory, but it performs the same size +// computation as the malloc function, and returns the real size of the +// allocation that would result from the equivalent malloc function call. +// Default weak implementation returns size unchanged, but tcmalloc overrides it +// and returns rounded up size. See the following link for details: +// http://www.unix.com/man-page/freebsd/3/nallocx/ +extern "C" size_t nallocx(size_t size, int flags) noexcept; + +// The sdallocx function deallocates memory allocated by malloc or memalign. It +// takes a size parameter to pass the original allocation size. +// +// The default weak implementation calls free(), but TCMalloc overrides it and +// uses the size to improve deallocation performance. +extern "C" void sdallocx(void* ptr, size_t size, int flags) noexcept; + +namespace tcmalloc { + +// Pointer / capacity information as returned by +// tcmalloc_size_returning_operator_new(). See +// tcmalloc_size_returning_operator_new() for more information. +struct sized_ptr_t { + void* p; + size_t n; +}; + +} // namespace tcmalloc + +// Allocates memory of at least the requested size. +// +// Returns a `sized_ptr_t` struct holding the allocated pointer, and the +// capacity of the allocated memory, which may be larger than the requested +// size. +// +// The returned pointer follows the alignment requirements of the standard new +// operator. This function will terminate on failure, except for the APIs +// accepting the std::nothrow parameter which will return {nullptr, 0} on +// failure. +// +// The returned pointer must be freed calling the matching ::operator delete. +// +// If a sized operator delete operator is invoked, then the 'size' parameter +// passed to delete must be greater or equal to the original requested size, and +// less than or equal to the capacity of the allocated memory as returned by the +// `tcmalloc_size_returning_operator_new` method. +// +// If neither the original size or capacity is known, then the non-sized +// operator delete can be invoked, however, this should be avoided, as this is +// substantially less efficient. +// +// The default weak implementation allocates the memory using the corresponding +// (matching) ::operator new(size_t, ...). +// +// This is a prototype API for the extension to C++ "size feedback in operator +// new" proposal: +// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p0901r5.html +extern "C" { +tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new(size_t size); +tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_nothrow( + size_t size) noexcept; + +// Aligned size returning new is only supported for libc++ because of issues +// with libstdcxx.so linkage. See http://b/110969867 for background. +#if defined(_LIBCPP_VERSION) && defined(__cpp_aligned_new) + +// Identical to `tcmalloc_size_returning_operator_new` except that the returned +// memory is aligned according to the `alignment` argument. +tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_aligned( + size_t size, std::align_val_t alignment); +tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_aligned_nothrow( + size_t size, std::align_val_t alignment) noexcept; + +#endif // _LIBCPP_VERSION && __cpp_aligned_new + +} // extern "C" + +#ifndef MALLOCX_LG_ALIGN +#define MALLOCX_LG_ALIGN(la) (la) +#endif + +namespace tcmalloc { +namespace tcmalloc_internal { + +// AllocationProfilingTokenBase tracks an on-going profiling session of sampled +// allocations. The session ends when Stop() is called. +// +// This decouples the implementation details (of TCMalloc) from the interface, +// allowing non-TCMalloc allocators (such as libc and sanitizers) to be provided +// while allowing the library to compile and link. +class AllocationProfilingTokenBase { + public: + virtual ~AllocationProfilingTokenBase() = default; + + // Finish recording started during construction of this object. + // + // After the first call, Stop() will return an empty profile. + virtual Profile Stop() && = 0; +}; + +// ProfileBase contains a profile of allocations. +// +// This decouples the implementation details (of TCMalloc) from the interface, +// allowing non-TCMalloc allocators (such as libc and sanitizers) to be provided +// while allowing the library to compile and link. +class ProfileBase { + public: + virtual ~ProfileBase() = default; + + // For each sample in the profile, Iterate invokes the callback f on the + // sample. + virtual void Iterate( + absl::FunctionRef f) const = 0; + + // The approximate interval between recorded samples of the event of interest. + // A period of 1 means every sample was recorded. + virtual int64_t Period() const = 0; + + // The type of profile (live objects, allocated, etc.). + virtual ProfileType Type() const = 0; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc + +#endif // TCMALLOC_MALLOC_EXTENSION_H_ diff --git a/tcmalloc/malloc_extension_system_malloc_test.cc b/tcmalloc/malloc_extension_system_malloc_test.cc new file mode 100644 index 000000000..81e7afa01 --- /dev/null +++ b/tcmalloc/malloc_extension_system_malloc_test.cc @@ -0,0 +1,87 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// These tests assume TCMalloc is not linked in, and therefore the features +// exposed by MallocExtension should be no-ops, but otherwise safe to call. + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/random/random.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace { + +TEST(MallocExtension, SnapshotCurrentIsEmpty) { + // Allocate memory to use the allocator. + absl::BitGen gen; + int bytes_remaining = 1 << 24; + std::vector ptrs; + + while (bytes_remaining > 0) { + int size = absl::LogUniform(gen, 0, 1 << 20); + ptrs.push_back(::operator new(size)); + bytes_remaining -= size; + } + + // All of the profiles should be empty. + ProfileType types[] = { + ProfileType::kHeap, + ProfileType::kFragmentation, ProfileType::kPeakHeap, + ProfileType::kAllocations, + }; + + for (auto t : types) { + SCOPED_TRACE(static_cast(t)); + + Profile p = MallocExtension::SnapshotCurrent(t); + int samples = 0; + p.Iterate([&](const Profile::Sample&) { samples++; }); + + EXPECT_EQ(samples, 0); + } + + for (void* ptr : ptrs) { + ::operator delete(ptr); + } +} + +TEST(MallocExtension, AllocationProfile) { + auto token = MallocExtension::StartAllocationProfiling(); + + // Allocate memory to use the allocator. + absl::BitGen gen; + int bytes_remaining = 1 << 24; + std::vector ptrs; + + while (bytes_remaining > 0) { + int size = absl::LogUniform(gen, 0, 1 << 20); + ptrs.push_back(::operator new(size)); + bytes_remaining -= size; + } + + // Finish profiling and verify the profile is empty. + Profile p = std::move(token).Stop(); + int samples = 0; + p.Iterate([&](const Profile::Sample&) { samples++; }); + + EXPECT_EQ(samples, 0); + + for (void* ptr : ptrs) { + ::operator delete(ptr); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/malloc_extension_test.cc b/tcmalloc/malloc_extension_test.cc new file mode 100644 index 000000000..a8fdc1e76 --- /dev/null +++ b/tcmalloc/malloc_extension_test.cc @@ -0,0 +1,39 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Test for TCMalloc implementation of MallocExtension + +#include "tcmalloc/malloc_extension.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace tcmalloc { +namespace { + +TEST(MallocExtension, Properties) { + // Verify that every property under GetProperties also works with + // GetNumericProperty. + const auto properties = MallocExtension::GetProperties(); + for (const auto& property : properties) { + absl::optional scalar = + MallocExtension::GetNumericProperty(property.first); + // The value of the property itself may have changed, so just check that it + // is present. + EXPECT_THAT(scalar, testing::Ne(absl::nullopt)) << property.first; + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/noruntime_size_classes.cc b/tcmalloc/noruntime_size_classes.cc new file mode 100644 index 000000000..7bd35b628 --- /dev/null +++ b/tcmalloc/noruntime_size_classes.cc @@ -0,0 +1,29 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/base/attributes.h" +#include "tcmalloc/runtime_size_classes.h" +#include "tcmalloc/size_class_info.h" + +namespace tcmalloc { + +// Default implementation doesn't load runtime size classes. +// To enable runtime size classes, link with :runtime_size_classes. +// This is in a separate library so that it doesn't get inlined inside common.cc +ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE int MaybeSizeClassesFromEnv( + int max_size, int max_classes, SizeClassInfo* parsed) { + return -1; +} + +} // namespace tcmalloc diff --git a/tcmalloc/page_allocator.cc b/tcmalloc/page_allocator.cc new file mode 100644 index 000000000..db5d2661b --- /dev/null +++ b/tcmalloc/page_allocator.cc @@ -0,0 +1,154 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/page_allocator.h" + +#include + +#include "tcmalloc/common.h" +#include "tcmalloc/experiment.h" +#include "tcmalloc/experiment_config.h" +#include "tcmalloc/huge_page_aware_allocator.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/util.h" +#include "tcmalloc/parameters.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { + +int ABSL_ATTRIBUTE_WEAK default_want_hpaa(); + +bool decide_want_hpaa() { +#if defined(__PPC64__) && defined(TCMALLOC_SMALL_BUT_SLOW) + // In small-but-slow, we choose a kMinSystemAlloc size that smaller than the + // hugepage size on PPC. If this situation changes, this static_assert will + // begin failing. + static_assert(kHugePageSize > kMinSystemAlloc, + "HPAA may now support PPC, update tests"); + return false; +#endif + + const char *e = + tcmalloc::tcmalloc_internal::thread_safe_getenv("TCMALLOC_HPAA_CONTROL"); + if (e) { + if (e[0] == '0') return false; + if (e[0] == '1') return true; + if (e[0] == '2') return true; + Log(kCrash, __FILE__, __LINE__, "bad env var", e); + return false; + } + + if (default_want_hpaa != nullptr) { + int default_hpaa = default_want_hpaa(); + if (default_hpaa != 0) { + return default_hpaa > 0; + } + } + +#if !defined(__x86_64__) || defined(TCMALLOC_SMALL_BUT_SLOW) + // Our testing of HPAA has focused on x86, and it's neither small nor slow :) + return false; +#else + return true; +#endif +} + +bool want_hpaa() { + static bool use = decide_want_hpaa(); + + return use; +} + +PageAllocator::PageAllocator() { + const bool kUseHPAA = want_hpaa(); + if (kUseHPAA) { + untagged_impl_ = + new (&choices_[0].hpaa) HugePageAwareAllocator(/*tagged=*/false); + tagged_impl_ = + new (&choices_[1].hpaa) HugePageAwareAllocator(/*tagged=*/true); + alg_ = HPAA; + } else { + untagged_impl_ = new (&choices_[0].ph) PageHeap(/*tagged=*/false); + tagged_impl_ = new (&choices_[1].ph) PageHeap(/*tagged=*/true); + alg_ = PAGE_HEAP; + } +} + +void PageAllocator::ShrinkToUsageLimit() { + if (limit_ == std::numeric_limits::max()) { + return; + } + BackingStats s = stats(); + size_t backed = s.system_bytes - s.unmapped_bytes + Static::metadata_bytes(); + if (backed <= limit_) { + // We're already fine. + return; + } + + limit_hits_++; + const size_t overage = backed - limit_; + const Length pages = (overage + kPageSize - 1) / kPageSize; + if (ShrinkHardBy(pages)) { + return; + } + + // We're still not below limit. + if (limit_is_hard_) { + limit_ = std::numeric_limits::max(); + Log(kCrash, __FILE__, __LINE__, + "Hit hard tcmalloc heap limit (e.g. --tcmalloc_heap_size_hard_limit). " + "Aborting.\nIt was most likely set to catch " + "allocations that would crash the process anyway. " + ); + } + + // Print logs once. + static bool warned = false; + if (warned) return; + warned = true; + Log(kLogWithStack, __FILE__, __LINE__, "Couldn't respect usage limit of ", + limit_, "and OOM is likely to follow."); +} + +bool PageAllocator::ShrinkHardBy(Length pages) { + Length ret = ReleaseAtLeastNPages(pages); + if (alg_ == HPAA) { + if (pages <= ret) { + // We released target amount. + return true; + } + + // At this point, we have no choice but to break up hugepages. + // However, if the client has turned off subrelease, and is using hard + // limits, then respect desire to do no subrelease ever. + if (limit_is_hard_ && !Parameters::hpaa_subrelease()) return false; + + static bool warned_hugepages = false; + if (!warned_hugepages) { + Log(kLogWithStack, __FILE__, __LINE__, "Couldn't respect usage limit of ", + limit_, "without breaking hugepages - performance will drop"); + warned_hugepages = true; + } + ret += static_cast(untagged_impl_) + ->ReleaseAtLeastNPagesBreakingHugepages(pages - ret); + if (ret < pages) { + ret += static_cast(tagged_impl_) + ->ReleaseAtLeastNPagesBreakingHugepages(pages - ret); + } + } + // Return "true", if we got back under the limit. + return (pages <= ret); +} + +} // namespace tcmalloc diff --git a/tcmalloc/page_allocator.h b/tcmalloc/page_allocator.h new file mode 100644 index 000000000..602335f5a --- /dev/null +++ b/tcmalloc/page_allocator.h @@ -0,0 +1,198 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_PAGE_ALLOCATOR_H_ +#define TCMALLOC_PAGE_ALLOCATOR_H_ + +#include +#include + +#include + +#include "absl/base/thread_annotations.h" +#include "tcmalloc/common.h" +#include "tcmalloc/huge_page_aware_allocator.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/page_allocator_interface.h" +#include "tcmalloc/page_heap.h" +#include "tcmalloc/span.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc { + +class PageAllocator { + public: + PageAllocator(); + ~PageAllocator() = delete; + // Allocate a run of "n" pages. Returns zero if out of memory. + // Caller should not pass "n == 0" -- instead, n should have + // been rounded up already. + // Any address in the returned Span is guaranteed to satisfy + // IsTaggedMemory(addr) == "tagged". + Span* New(Length n, bool tagged) LOCKS_EXCLUDED(pageheap_lock); + + // As New, but the returned span is aligned to a -page boundary. + // must be a power of two. + Span* NewAligned(Length n, Length align, bool tagged) + LOCKS_EXCLUDED(pageheap_lock); + + // Delete the span "[p, p+n-1]". + // REQUIRES: span was returned by earlier call to New() with the same value of + // "tagged" and has not yet been deleted. + void Delete(Span* span, bool tagged) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + BackingStats stats() const EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + void GetSmallSpanStats(SmallSpanStats* result) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + void GetLargeSpanStats(LargeSpanStats* result) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Try to release at least num_pages for reuse by the OS. Returns + // the actual number of pages released, which may be less than + // num_pages if there weren't enough pages to release. The result + // may also be larger than num_pages since page_heap might decide to + // release one large range instead of fragmenting it into two + // smaller released and unreleased ranges. + Length ReleaseAtLeastNPages(Length num_pages) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Prints stats about the page heap to *out. + void Print(TCMalloc_Printer* out, bool tagged) LOCKS_EXCLUDED(pageheap_lock); + void PrintInPbtxt(PbtxtRegion* region, bool tagged) + LOCKS_EXCLUDED(pageheap_lock); + + void set_limit(size_t limit, bool is_hard) LOCKS_EXCLUDED(pageheap_lock); + std::pair limit() const LOCKS_EXCLUDED(pageheap_lock); + int64_t limit_hits() const LOCKS_EXCLUDED(pageheap_lock); + + // If we have a usage limit set, ensure we're not violating it from our latest + // allocation. + void ShrinkToUsageLimit() EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + const PageAllocInfo& info(bool tagged) const + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + enum Algorithm { + PAGE_HEAP = 0, + HPAA = 1, + }; + + Algorithm algorithm() const { return alg_; } + + private: + bool ShrinkHardBy(Length pages) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + PageAllocatorInterface* impl(bool tagged) const; + + union Choices { + Choices() : dummy(0) {} + ~Choices() {} + int dummy; + PageHeap ph; + HugePageAwareAllocator hpaa; + } choices_[2]; + PageAllocatorInterface* untagged_impl_; + PageAllocatorInterface* tagged_impl_; + Algorithm alg_; + + bool limit_is_hard_{false}; + // Max size of backed spans we will attempt to maintain. + size_t limit_{std::numeric_limits::max()}; + // The number of times the limit has been hit. + int64_t limit_hits_{0}; +}; + +inline PageAllocatorInterface* PageAllocator::impl(bool tagged) const { + return tagged ? tagged_impl_ : untagged_impl_; +} + +inline Span* PageAllocator::New(Length n, bool tagged) { + return impl(tagged)->New(n); +} + +inline Span* PageAllocator::NewAligned(Length n, Length align, bool tagged) { + return impl(tagged)->NewAligned(n, align); +} + +inline void PageAllocator::Delete(Span* span, bool tagged) { + impl(tagged)->Delete(span); +} + +inline BackingStats PageAllocator::stats() const { + return untagged_impl_->stats() + tagged_impl_->stats(); +} + +inline void PageAllocator::GetSmallSpanStats(SmallSpanStats* result) { + SmallSpanStats untagged, tagged; + untagged_impl_->GetSmallSpanStats(&untagged); + tagged_impl_->GetSmallSpanStats(&tagged); + *result = untagged + tagged; +} + +inline void PageAllocator::GetLargeSpanStats(LargeSpanStats* result) { + LargeSpanStats untagged, tagged; + untagged_impl_->GetLargeSpanStats(&untagged); + tagged_impl_->GetLargeSpanStats(&tagged); + *result = untagged + tagged; +} + +inline Length PageAllocator::ReleaseAtLeastNPages(Length num_pages) { + Length released = untagged_impl_->ReleaseAtLeastNPages(num_pages); + if (released < num_pages) { + released += tagged_impl_->ReleaseAtLeastNPages(num_pages - released); + } + return released; +} + +inline void PageAllocator::Print(TCMalloc_Printer* out, bool tagged) { + if (tagged) { + out->printf("\n>>>>>>> Begin tagged page allocator <<<<<<<\n"); + } + impl(tagged)->Print(out); + if (tagged) { + out->printf(">>>>>>> End tagged page allocator <<<<<<<\n"); + } +} + +inline void PageAllocator::PrintInPbtxt(PbtxtRegion* region, bool tagged) { + PbtxtRegion pa = region->CreateSubRegion("page_allocator"); + pa.PrintBool("tagged", tagged); + impl(tagged)->PrintInPbtxt(&pa); +} + +inline void PageAllocator::set_limit(size_t limit, bool is_hard) { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + limit_ = limit; + limit_is_hard_ = is_hard; +} + +inline std::pair PageAllocator::limit() const { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + return {limit_, limit_is_hard_}; +} + +inline int64_t PageAllocator::limit_hits() const { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + return limit_hits_; +} + +inline const PageAllocInfo& PageAllocator::info(bool tagged) const { + return impl(tagged)->info(); +} + +} // namespace tcmalloc + +#endif // TCMALLOC_PAGE_ALLOCATOR_H_ diff --git a/tcmalloc/page_allocator_interface.cc b/tcmalloc/page_allocator_interface.cc new file mode 100644 index 000000000..de1bf0bf5 --- /dev/null +++ b/tcmalloc/page_allocator_interface.cc @@ -0,0 +1,75 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/page_allocator_interface.h" + +#include +#include +#include +#include +#include +#include + +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/util.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { + +using tcmalloc::tcmalloc_internal::signal_safe_open; +using tcmalloc::tcmalloc_internal::thread_safe_getenv; + +static int OpenLog(bool tagged) { + const char *fname = tagged + ? thread_safe_getenv("TCMALLOC_TAGGED_PAGE_LOG_FILE") + : thread_safe_getenv("TCMALLOC_PAGE_LOG_FILE"); + if (!fname) return -1; + + if (getuid() != geteuid() || getgid() != getegid()) { + Log(kLog, __FILE__, __LINE__, "Cannot take a pagetrace from setuid binary"); + return -1; + } + char buf[PATH_MAX]; + // Tag file with PID - handles forking children much better. + int pid = getpid(); + // Blaze tests can output here for recovery of the output file + const char *test_dir = thread_safe_getenv("TEST_UNDECLARED_OUTPUTS_DIR"); + if (test_dir) { + snprintf(buf, sizeof(buf), "%s/%s.%d", test_dir, fname, pid); + } else { + snprintf(buf, sizeof(buf), "%s.%d", fname, pid); + } + int fd = + signal_safe_open(buf, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); + + if (fd < 0) { + Log(kCrash, __FILE__, __LINE__, fd, errno, fname); + } + + return fd; +} + +PageAllocatorInterface::PageAllocatorInterface(const char *label, bool tagged) + : PageAllocatorInterface(label, Static::pagemap(), tagged) {} + +PageAllocatorInterface::PageAllocatorInterface(const char *label, PageMap *map, + bool tagged) + : info_(label, OpenLog(tagged)), pagemap_(map), tagged_(tagged) {} + +PageAllocatorInterface::~PageAllocatorInterface() { + // This is part of tcmalloc statics - they must be immortal. + Log(kCrash, __FILE__, __LINE__, "should never destroy this"); +} + +} // namespace tcmalloc diff --git a/tcmalloc/page_allocator_interface.h b/tcmalloc/page_allocator_interface.h new file mode 100644 index 000000000..54d213b47 --- /dev/null +++ b/tcmalloc/page_allocator_interface.h @@ -0,0 +1,92 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_PAGE_ALLOCATOR_INTERFACE_H_ +#define TCMALLOC_PAGE_ALLOCATOR_INTERFACE_H_ + +#include + +#include +#include + +#include "absl/base/internal/spinlock.h" +#include "absl/base/thread_annotations.h" +#include "tcmalloc/common.h" +#include "tcmalloc/span.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc { + +class PageMap; + +class PageAllocatorInterface { + public: + PageAllocatorInterface(const char* label, bool tagged); + // For testing: use a non-default pagemap. + PageAllocatorInterface(const char* label, PageMap* map, bool tagged); + virtual ~PageAllocatorInterface(); + // Allocate a run of "n" pages. Returns zero if out of memory. + // Caller should not pass "n == 0" -- instead, n should have + // been rounded up already. + virtual Span* New(Length n) LOCKS_EXCLUDED(pageheap_lock) = 0; + + // As New, but the returned span is aligned to a -page boundary. + // must be a power of two. + virtual Span* NewAligned(Length n, Length align) + LOCKS_EXCLUDED(pageheap_lock) = 0; + + // Delete the span "[p, p+n-1]". + // REQUIRES: span was returned by earlier call to New() and + // has not yet been deleted. + virtual void Delete(Span* span) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0; + + virtual BackingStats stats() const + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0; + + virtual void GetSmallSpanStats(SmallSpanStats* result) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0; + + virtual void GetLargeSpanStats(LargeSpanStats* result) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0; + + // Try to release at least num_pages for reuse by the OS. Returns + // the actual number of pages released, which may be less than + // num_pages if there weren't enough pages to release. The result + // may also be larger than num_pages since page_heap might decide to + // release one large range instead of fragmenting it into two + // smaller released and unreleased ranges. + virtual Length ReleaseAtLeastNPages(Length num_pages) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0; + + // Prints stats about the page heap to *out. + virtual void Print(TCMalloc_Printer* out) LOCKS_EXCLUDED(pageheap_lock) = 0; + + // Prints stats about the page heap in pbtxt format. + // + // TODO(b/130249686): Remove this one and make `Print` print in pbtxt. + virtual void PrintInPbtxt(PbtxtRegion* region) + LOCKS_EXCLUDED(pageheap_lock) = 0; + + const PageAllocInfo& info() const { return info_; } + + protected: + PageAllocInfo info_ GUARDED_BY(pageheap_lock); + PageMap* pagemap_; + + bool tagged_; // Whether this heap manages tagged or untagged memory. +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_PAGE_ALLOCATOR_INTERFACE_H_ diff --git a/tcmalloc/page_allocator_test.cc b/tcmalloc/page_allocator_test.cc new file mode 100644 index 000000000..cf31b81f0 --- /dev/null +++ b/tcmalloc/page_allocator_test.cc @@ -0,0 +1,142 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Tests for infrastructure common to page allocator implementations +// (stats and logging.) +#include "tcmalloc/page_allocator.h" + +#include +#include +#include + +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/internal/spinlock.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/page_allocator_test_util.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc { +namespace { + +class PageAllocatorTest : public testing::Test { + protected: + // Not in constructor so subclasses can mess about with environment + // variables. + void SetUp() override { + // If this test is not linked against TCMalloc, the global arena used for + // metadata will not be initialized. + Static::InitIfNecessary(); + + before_ = MallocExtension::GetRegionFactory(); + extra_ = new ExtraRegionFactory(before_); + MallocExtension::SetRegionFactory(extra_); + void *p = malloc(sizeof(PageAllocator)); + allocator_ = new (p) PageAllocator; + } + void TearDown() override { + MallocExtension::SetRegionFactory(before_); + delete extra_; + free(allocator_); + } + + Span *New(Length n) { return allocator_->New(n, /*tagged=*/false); } + Span *NewAligned(Length n, Length align) { + return allocator_->NewAligned(n, align, /*tagged=*/false); + } + void Delete(Span *s) { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + allocator_->Delete(s, /*tagged=*/false); + } + + Length Release(Length n) { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + return allocator_->ReleaseAtLeastNPages(n); + } + + std::string Print() { + std::vector buf(1024 * 1024); + TCMalloc_Printer out(&buf[0], buf.size()); + allocator_->Print(&out, /*tagged=*/false); + + return std::string(&buf[0]); + } + + PageAllocator *allocator_; + ExtraRegionFactory *extra_; + AddressRegionFactory *before_; +}; + +// We've already tested in stats_test that PageAllocInfo keeps good stats; +// here we're just testing that we make the proper Record calls. +TEST_F(PageAllocatorTest, Record) { + for (int i = 0; i < 15; ++i) { + Delete(New(1)); + } + + std::vector spans; + for (int i = 0; i < 20; ++i) { + spans.push_back(New(2)); + } + + for (int i = 0; i < 25; ++i) { + Delete(NewAligned(3, 2)); + } + { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + auto info = allocator_->info(/*tagged=*/false); + + CHECK_CONDITION(15 == info.counts_for(1).nalloc); + CHECK_CONDITION(15 == info.counts_for(1).nfree); + + CHECK_CONDITION(20 == info.counts_for(2).nalloc); + CHECK_CONDITION(0 == info.counts_for(2).nfree); + + CHECK_CONDITION(25 == info.counts_for(3).nalloc); + CHECK_CONDITION(25 == info.counts_for(3).nfree); + + for (Length i = 4; i <= kMaxPages; ++i) { + CHECK_CONDITION(0 == info.counts_for(i).nalloc); + CHECK_CONDITION(0 == info.counts_for(i).nfree); + } + + const Length absurd = Length{1} << (kAddressBits - 1 - kPageShift); + for (Length i = kMaxPages + 1; i < absurd; i *= 2) { + CHECK_CONDITION(0 == info.counts_for(i).nalloc); + CHECK_CONDITION(0 == info.counts_for(i).nfree); + } + } + for (auto s : spans) Delete(s); +} + +// And that we call the print method properly. +TEST_F(PageAllocatorTest, PrintIt) { + Delete(New(1)); + std::string output = Print(); + EXPECT_THAT(output, testing::ContainsRegex("stats on allocation sizes")); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/page_allocator_test_util.h b/tcmalloc/page_allocator_test_util.h new file mode 100644 index 000000000..31c180535 --- /dev/null +++ b/tcmalloc/page_allocator_test_util.h @@ -0,0 +1,75 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_TEST_UTIL_H_ +#define TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_TEST_UTIL_H_ + +#include +#include + +#include "tcmalloc/malloc_extension.h" + +// TODO(b/116000878): Remove dependency on common.h if it causes ODR issues. +#include "tcmalloc/common.h" + +namespace tcmalloc { + +// AddressRegion that adds some padding on either side of each +// allocation. This prevents multiple PageAllocators in the system +// from noticing one another's presence in the pagemap. +class ExtraRegion : public AddressRegion { + public: + explicit ExtraRegion(AddressRegion *under) : under_(under) {} + + std::pair Alloc(size_t size, size_t alignment) override { + size_t big = size + alignment + alignment; + // Can't pad if allocation is within 2 * alignment of region size. + if (big > kMinMmapAlloc) { + return under_->Alloc(size, alignment); + } + void *ptr; + size_t actual_size; + std::tie(ptr, actual_size) = under_->Alloc(big, alignment); + if (!ptr) return {nullptr, 0}; + actual_size = actual_size - alignment * 2; + return {static_cast(ptr) + alignment, actual_size}; + } + + private: + AddressRegion *under_; +}; + +class ExtraRegionFactory : public AddressRegionFactory { + public: + explicit ExtraRegionFactory(AddressRegionFactory *under) : under_(under) {} + + AddressRegion *Create(void *start, size_t size, UsageHint hint) override { + AddressRegion *underlying_region = under_->Create(start, size, hint); + CHECK_CONDITION(underlying_region); + void *region_space = MallocInternal(sizeof(ExtraRegion)); + CHECK_CONDITION(region_space); + return new (region_space) ExtraRegion(underlying_region); + } + + size_t GetStats(absl::Span buffer) override { + return under_->GetStats(buffer); + } + + private: + AddressRegionFactory *under_; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_TEST_UTIL_H_ diff --git a/tcmalloc/page_heap.cc b/tcmalloc/page_heap.cc new file mode 100644 index 000000000..ccb0565fa --- /dev/null +++ b/tcmalloc/page_heap.cc @@ -0,0 +1,534 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/page_heap.h" + +#include + +#include + +#include "absl/base/internal/cycleclock.h" +#include "absl/base/internal/spinlock.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/page_heap_allocator.h" +#include "tcmalloc/pagemap.h" +#include "tcmalloc/parameters.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/system-alloc.h" + +namespace tcmalloc { + +// Helper function to record span address into pageheap +void PageHeap::RecordSpan(Span* span) { + pagemap_->Set(span->first_page(), span); + if (span->num_pages() > 1) { + pagemap_->Set(span->last_page(), span); + } +} + +PageHeap::PageHeap(bool tagged) : PageHeap(Static::pagemap(), tagged) {} + +PageHeap::PageHeap(PageMap* map, bool tagged) + : PageAllocatorInterface("PageHeap", map, tagged), + scavenge_counter_(0), + // Start scavenging at kMaxPages list + release_index_(kMaxPages) { + large_.normal.Init(); + large_.returned.Init(); + for (int i = 0; i < kMaxPages; i++) { + free_[i].normal.Init(); + free_[i].returned.Init(); + } +} + +Span* PageHeap::SearchFreeAndLargeLists(Length n, bool* from_returned) { + ASSERT(Check()); + ASSERT(n > 0); + + // Find first size >= n that has a non-empty list + for (Length s = n; s < kMaxPages; s++) { + SpanList* ll = &free_[s].normal; + // If we're lucky, ll is non-empty, meaning it has a suitable span. + if (!ll->empty()) { + ASSERT(ll->first()->location() == Span::ON_NORMAL_FREELIST); + *from_returned = false; + return Carve(ll->first(), n); + } + // Alternatively, maybe there's a usable returned span. + ll = &free_[s].returned; + if (!ll->empty()) { + ASSERT(ll->first()->location() == Span::ON_RETURNED_FREELIST); + *from_returned = true; + return Carve(ll->first(), n); + } + } + // No luck in free lists, our last chance is in a larger class. + return AllocLarge(n, from_returned); // May be NULL +} + +Span* PageHeap::AllocateSpan(Length n, bool* from_returned) { + ASSERT(Check()); + Span* result = SearchFreeAndLargeLists(n, from_returned); + if (result != nullptr) return result; + + // Grow the heap and try again. + if (!GrowHeap(n)) { + ASSERT(Check()); + return nullptr; + } + + result = SearchFreeAndLargeLists(n, from_returned); + // our new memory should be unbacked + ASSERT(*from_returned); + return result; +} + +Span* PageHeap::New(Length n) { + ASSERT(n > 0); + bool from_returned; + Span* result; + { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + result = AllocateSpan(n, &from_returned); + if (result) Static::page_allocator()->ShrinkToUsageLimit(); + if (result) info_.RecordAlloc(result->first_page(), result->num_pages()); + } + + if (result != nullptr && from_returned) { + SystemBack(result->start_address(), result->bytes_in_span()); + } + + ASSERT(!result || IsTaggedMemory(result->start_address()) == tagged_); + return result; +} + +static bool IsSpanBetter(Span* span, Span* best, Length n) { + if (span->num_pages() < n) { + return false; + } + if (best == nullptr) { + return true; + } + if (span->num_pages() < best->num_pages()) { + return true; + } + if (span->num_pages() > best->num_pages()) { + return false; + } + return span->first_page() < best->first_page(); +} + +// We could do slightly more efficient things here (we do some +// unnecessary Carves in New) but it's not anywhere +// close to a fast path, and is going to be replaced soon anyway, so +// don't bother. +Span* PageHeap::NewAligned(Length n, Length align) { + ASSERT(n > 0); + ASSERT((align & (align - 1)) == 0); + + if (align <= 1) { + return New(n); + } + + bool from_returned; + Span* span; + { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + Length extra = align - 1; + span = AllocateSpan(n + extra, &from_returned); + if (span == nullptr) return nullptr; + // certainly contains an appropriately aligned region; find it + // and chop off the rest. + PageID p = span->first_page(); + const PageID mask = align - 1; + PageID aligned = (p + mask) & ~mask; + ASSERT(aligned % align == 0); + ASSERT(p <= aligned); + ASSERT(aligned + n <= p + span->num_pages()); + // we have too many pages now, possible all before, possibly all + // after, maybe both + Length before = aligned - p; + Length after = extra - before; + span->set_first_page(aligned); + span->set_num_pages(n); + RecordSpan(span); + + const Span::Location loc = + from_returned ? Span::ON_RETURNED_FREELIST : Span::ON_NORMAL_FREELIST; + if (before > 0) { + Span* extra = Span::New(p, before); + extra->set_location(loc); + RecordSpan(extra); + MergeIntoFreeList(extra); + } + + if (after > 0) { + Span* extra = Span::New(aligned + n, after); + extra->set_location(loc); + RecordSpan(extra); + MergeIntoFreeList(extra); + } + + info_.RecordAlloc(aligned, n); + } + + if (span != nullptr && from_returned) { + SystemBack(span->start_address(), span->bytes_in_span()); + } + + ASSERT(!span || IsTaggedMemory(span->start_address()) == tagged_); + return span; +} + +Span* PageHeap::AllocLarge(Length n, bool* from_returned) { + // find the best span (closest to n in size). + // The following loops implements address-ordered best-fit. + Span* best = nullptr; + + // Search through normal list + for (Span* span : large_.normal) { + ASSERT(span->location() == Span::ON_NORMAL_FREELIST); + if (IsSpanBetter(span, best, n)) { + best = span; + *from_returned = false; + } + } + + // Search through released list in case it has a better fit + for (Span* span : large_.returned) { + ASSERT(span->location() == Span::ON_RETURNED_FREELIST); + if (IsSpanBetter(span, best, n)) { + best = span; + *from_returned = true; + } + } + + return best == nullptr ? nullptr : Carve(best, n); +} + +Span* PageHeap::Carve(Span* span, Length n) { + ASSERT(n > 0); + ASSERT(span->location() != Span::IN_USE); + const Span::Location old_location = span->location(); + RemoveFromFreeList(span); + span->set_location(Span::IN_USE); + + const int extra = span->num_pages() - n; + ASSERT(extra >= 0); + if (extra > 0) { + Span* leftover = nullptr; + // Check if this span has another span on the right but not on the left. + // There is one special case we want to handle: if heap grows down (as it is + // usually happens with mmap allocator) and user allocates lots of large + // persistent memory blocks (namely, kMinSystemAlloc + epsilon), then we + // want to return the last part of the span to user and push the beginning + // to the freelist. + // Otherwise system allocator would allocate 2 * kMinSystemAlloc, we return + // the first kMinSystemAlloc + epsilon to user and add the remaining + // kMinSystemAlloc - epsilon to the freelist. The remainder is not large + // enough to satisfy the next allocation request, so we allocate + // another 2 * kMinSystemAlloc from system and the process repeats wasting + // half of memory. + // If we return the last part to user, then the remainder will be merged + // with the next system allocation which will result in dense packing. + // There are no other known cases where span splitting strategy matters, + // so in other cases we return beginning to user. + if (pagemap_->GetDescriptor(span->first_page() - 1) == nullptr && + pagemap_->GetDescriptor(span->last_page() + 1) != nullptr) { + leftover = Span::New(span->first_page(), extra); + span->set_first_page(span->first_page() + extra); + pagemap_->Set(span->first_page(), span); + } else { + leftover = Span::New(span->first_page() + n, extra); + } + leftover->set_location(old_location); + RecordSpan(leftover); + PrependToFreeList(leftover); // Skip coalescing - no candidates possible + leftover->set_freelist_added_time(span->freelist_added_time()); + span->set_num_pages(n); + pagemap_->Set(span->last_page(), span); + } + ASSERT(Check()); + return span; +} + +void PageHeap::Delete(Span* span) { + ASSERT(IsTaggedMemory(span->start_address()) == tagged_); + info_.RecordFree(span->first_page(), span->num_pages()); + ASSERT(Check()); + ASSERT(span->location() == Span::IN_USE); + ASSERT(!span->sampled()); + ASSERT(span->num_pages() > 0); + ASSERT(pagemap_->GetDescriptor(span->first_page()) == span); + ASSERT(pagemap_->GetDescriptor(span->last_page()) == span); + const Length n = span->num_pages(); + span->set_location(Span::ON_NORMAL_FREELIST); + MergeIntoFreeList(span); // Coalesces if possible + IncrementalScavenge(n); + ASSERT(Check()); +} + +void PageHeap::MergeIntoFreeList(Span* span) { + ASSERT(span->location() != Span::IN_USE); + span->set_freelist_added_time(absl::base_internal::CycleClock::Now()); + + // Coalesce -- we guarantee that "p" != 0, so no bounds checking + // necessary. We do not bother resetting the stale pagemap + // entries for the pieces we are merging together because we only + // care about the pagemap entries for the boundaries. + // + // Note that only similar spans are merged together. For example, + // we do not coalesce "returned" spans with "normal" spans. + const PageID p = span->first_page(); + const Length n = span->num_pages(); + Span* prev = pagemap_->GetDescriptor(p - 1); + if (prev != nullptr && prev->location() == span->location()) { + // Merge preceding span into this span + ASSERT(prev->last_page() + 1 == p); + const Length len = prev->num_pages(); + span->AverageFreelistAddedTime(prev); + RemoveFromFreeList(prev); + Span::Delete(prev); + span->set_first_page(span->first_page() - len); + span->set_num_pages(span->num_pages() + len); + pagemap_->Set(span->first_page(), span); + } + Span* next = pagemap_->GetDescriptor(p + n); + if (next != nullptr && next->location() == span->location()) { + // Merge next span into this span + ASSERT(next->first_page() == p + n); + const Length len = next->num_pages(); + span->AverageFreelistAddedTime(next); + RemoveFromFreeList(next); + Span::Delete(next); + span->set_num_pages(span->num_pages() + len); + pagemap_->Set(span->last_page(), span); + } + + PrependToFreeList(span); +} + +void PageHeap::PrependToFreeList(Span* span) { + ASSERT(span->location() != Span::IN_USE); + SpanListPair* list = + (span->num_pages() < kMaxPages) ? &free_[span->num_pages()] : &large_; + if (span->location() == Span::ON_NORMAL_FREELIST) { + stats_.free_bytes += span->bytes_in_span(); + list->normal.prepend(span); + } else { + stats_.unmapped_bytes += span->bytes_in_span(); + list->returned.prepend(span); + } +} + +void PageHeap::RemoveFromFreeList(Span* span) { + ASSERT(span->location() != Span::IN_USE); + if (span->location() == Span::ON_NORMAL_FREELIST) { + stats_.free_bytes -= span->bytes_in_span(); + } else { + stats_.unmapped_bytes -= span->bytes_in_span(); + } + span->RemoveFromList(); +} + +void PageHeap::IncrementalScavenge(Length n) { +} + +Length PageHeap::ReleaseLastNormalSpan(SpanListPair* slist) { + Span* s = slist->normal.last(); + ASSERT(s->location() == Span::ON_NORMAL_FREELIST); + RemoveFromFreeList(s); + + // We're dropping very important and otherwise contended pageheap_lock around + // call to potentially very slow syscall to release pages. Those syscalls can + // be slow even with "advanced" things such as MADV_FREE{,ABLE} because they + // have to walk actual page tables, and we sometimes deal with large spans, + // which sometimes takes lots of time. Plus Linux grabs per-address space + // mm_sem lock which could be extremely contended at times. So it is best if + // we avoid holding one contended lock while waiting for another. + // + // Note, we set span location to in-use, because our span could be found via + // pagemap in e.g. MergeIntoFreeList while we're not holding the lock. By + // marking it in-use we prevent this possibility. So span is removed from free + // list and marked "unmergable" and that guarantees safety during unlock-ful + // release. + // + // Taking the span off the free list will make our stats reporting wrong if + // another thread happens to try to measure memory usage during the release, + // so we fix up the stats during the unlocked period. + stats_.free_bytes += s->bytes_in_span(); + s->set_location(Span::IN_USE); + pageheap_lock.Unlock(); + + const Length n = s->num_pages(); + SystemRelease(s->start_address(), s->bytes_in_span()); + + pageheap_lock.Lock(); + stats_.free_bytes -= s->bytes_in_span(); + s->set_location(Span::ON_RETURNED_FREELIST); + MergeIntoFreeList(s); // Coalesces if possible. + return n; +} + +Length PageHeap::ReleaseAtLeastNPages(Length num_pages) { + Length released_pages = 0; + Length prev_released_pages = -1; + + // Round robin through the lists of free spans, releasing the last + // span in each list. Stop after releasing at least num_pages. + while (released_pages < num_pages) { + if (released_pages == prev_released_pages) { + // Last iteration of while loop made no progress. + break; + } + prev_released_pages = released_pages; + + for (int i = 0; i < kMaxPages+1 && released_pages < num_pages; + i++, release_index_++) { + if (release_index_ > kMaxPages) release_index_ = 0; + SpanListPair* slist = + (release_index_ == kMaxPages) ? &large_ : &free_[release_index_]; + if (!slist->normal.empty()) { + Length released_len = ReleaseLastNormalSpan(slist); + released_pages += released_len; + } + } + } + info_.RecordRelease(num_pages, released_pages); + return released_pages; +} + +void PageHeap::GetSmallSpanStats(SmallSpanStats* result) { + for (int s = 0; s < kMaxPages; s++) { + result->normal_length[s] = free_[s].normal.length(); + result->returned_length[s] = free_[s].returned.length(); + } +} + +void PageHeap::GetLargeSpanStats(LargeSpanStats* result) { + result->spans = 0; + result->normal_pages = 0; + result->returned_pages = 0; + for (Span* s : large_.normal) { + result->normal_pages += s->num_pages(); + result->spans++; + } + for (Span* s : large_.returned) { + result->returned_pages += s->num_pages(); + result->spans++; + } +} + +bool PageHeap::GrowHeap(Length n) { + if (n > kMaxValidPages) return false; + size_t actual_size; + void* ptr = SystemAlloc(n << kPageShift, &actual_size, kPageSize, tagged_); + if (ptr == nullptr) return false; + n = actual_size >> kPageShift; + + stats_.system_bytes += actual_size; + const PageID p = reinterpret_cast(ptr) >> kPageShift; + ASSERT(p > 0); + + // If we have already a lot of pages allocated, just pre allocate a bunch of + // memory for the page map. This prevents fragmentation by pagemap metadata + // when a program keeps allocating and freeing large blocks. + + // Make sure pagemap has entries for all of the new pages. + // Plus ensure one before and one after so coalescing code + // does not need bounds-checking. + if (pagemap_->Ensure(p - 1, n + 2)) { + // Pretend the new area is allocated and then return it to cause + // any necessary coalescing to occur. + Span* span = Span::New(p, n); + RecordSpan(span); + span->set_location(Span::ON_RETURNED_FREELIST); + MergeIntoFreeList(span); + ASSERT(Check()); + return true; + } else { + // We could not allocate memory within the pagemap. + // Note the following leaks virtual memory, but at least it gets rid of + // the underlying physical memory. + SystemRelease(ptr, actual_size); + return false; + } +} + +bool PageHeap::Check() { + ASSERT(free_[0].normal.empty()); + ASSERT(free_[0].returned.empty()); + return true; +} + +void PageHeap::PrintInPbtxt(PbtxtRegion* region) { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + SmallSpanStats small; + GetSmallSpanStats(&small); + LargeSpanStats large; + GetLargeSpanStats(&large); + + struct Helper { + static void RecordAges(PageAgeHistograms* ages, const SpanListPair& pair) { + for (const Span* s : pair.normal) { + ages->RecordRange(s->num_pages(), false, s->freelist_added_time()); + } + + for (const Span* s : pair.returned) { + ages->RecordRange(s->num_pages(), true, s->freelist_added_time()); + } + } + }; + + PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); + for (int s = 0; s < kMaxPages; ++s) { + Helper::RecordAges(&ages, free_[s]); + } + Helper::RecordAges(&ages, large_); + PrintStatsInPbtxt(region, small, large, ages); + // We do not collect info_.PrintInPbtxt for now. +} + +void PageHeap::Print(TCMalloc_Printer* out) { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + SmallSpanStats small; + GetSmallSpanStats(&small); + LargeSpanStats large; + GetLargeSpanStats(&large); + PrintStats("PageHeap", out, stats_, small, large, true); + + struct Helper { + static void RecordAges(PageAgeHistograms* ages, const SpanListPair& pair) { + for (const Span* s : pair.normal) { + ages->RecordRange(s->num_pages(), false, s->freelist_added_time()); + } + + for (const Span* s : pair.returned) { + ages->RecordRange(s->num_pages(), true, s->freelist_added_time()); + } + } + }; + + PageAgeHistograms ages(absl::base_internal::CycleClock::Now()); + for (int s = 0; s < kMaxPages; ++s) { + Helper::RecordAges(&ages, free_[s]); + } + Helper::RecordAges(&ages, large_); + ages.Print("PageHeap", out); + + info_.Print(out); +} + +} // namespace tcmalloc diff --git a/tcmalloc/page_heap.h b/tcmalloc/page_heap.h new file mode 100644 index 000000000..19da71d1b --- /dev/null +++ b/tcmalloc/page_heap.h @@ -0,0 +1,183 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_PAGE_HEAP_H_ +#define TCMALLOC_PAGE_HEAP_H_ + +#include + +#include "absl/base/thread_annotations.h" +#include "tcmalloc/common.h" +#include "tcmalloc/page_allocator_interface.h" +#include "tcmalloc/span.h" +#include "tcmalloc/stats.h" + +namespace tcmalloc { + +// ------------------------------------------------------------------------- +// Page-level allocator +// * Eager coalescing +// +// Heap for page-level allocation. We allow allocating and freeing a +// contiguous runs of pages (called a "span"). +// ------------------------------------------------------------------------- + +class PageHeap : public PageAllocatorInterface { + public: + explicit PageHeap(bool tagged); + // for testing + PageHeap(PageMap* map, bool tagged); + + // Allocate a run of "n" pages. Returns zero if out of memory. + // Caller should not pass "n == 0" -- instead, n should have + // been rounded up already. + // The returned memory is backed. + Span* New(Length n) LOCKS_EXCLUDED(pageheap_lock) override; + + // As New, but the returned span is aligned to a -page boundary. + // must be a power of two. + Span* NewAligned(Length n, Length align) + LOCKS_EXCLUDED(pageheap_lock) override; + + // Delete the span "[p, p+n-1]". + // REQUIRES: span was returned by earlier call to New() and + // has not yet been deleted. + void Delete(Span* span) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; + + inline BackingStats stats() const + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override { + return stats_; + } + + void GetSmallSpanStats(SmallSpanStats* result) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; + + void GetLargeSpanStats(LargeSpanStats* result) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; + + // Try to release at least num_pages for reuse by the OS. Returns + // the actual number of pages released, which may be less than + // num_pages if there weren't enough pages to release. The result + // may also be larger than num_pages since page_heap might decide to + // release one large range instead of fragmenting it into two + // smaller released and unreleased ranges. + Length ReleaseAtLeastNPages(Length num_pages) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; + + // Prints stats about the page heap to *out. + void Print(TCMalloc_Printer* out) LOCKS_EXCLUDED(pageheap_lock) override; + + void PrintInPbtxt(PbtxtRegion* region) LOCKS_EXCLUDED(pageheap_lock) override; + + private: + // Never delay scavenging for more than the following number of + // deallocated pages. With 8K pages, this comes to 8GiB of + // deallocation. + static const int kMaxReleaseDelay = 1 << 20; + + // If there is nothing to release, wait for so many pages before + // scavenging again. With 8K pages, this comes to 2GiB of memory. + static const int kDefaultReleaseDelay = 1 << 18; + + // We segregate spans of a given size into two circular linked + // lists: one for normal spans, and one for spans whose memory + // has been returned to the system. + struct SpanListPair { + SpanList normal; + SpanList returned; + }; + + // List of free spans of length >= kMaxPages + SpanListPair large_ GUARDED_BY(pageheap_lock); + + // Array mapping from span length to a doubly linked list of free spans + SpanListPair free_[kMaxPages] GUARDED_BY(pageheap_lock); + + // Statistics on system, free, and unmapped bytes + BackingStats stats_ GUARDED_BY(pageheap_lock); + + Span* SearchFreeAndLargeLists(Length n, bool* from_returned) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + bool GrowHeap(Length n) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // REQUIRES: span->length >= n + // REQUIRES: span->location != IN_USE + // Remove span from its free list, and move any leftover part of + // span into appropriate free lists. Also update "span" to have + // length exactly "n" and mark it as non-free so it can be returned + // to the client. After all that, decrease free_pages_ by n and + // return span. + Span* Carve(Span* span, Length n) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Allocate a large span of length == n. If successful, returns a + // span of exactly the specified length. Else, returns NULL. + Span* AllocLarge(Length n, bool* from_returned) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Coalesce span with neighboring spans if possible, prepend to + // appropriate free list, and adjust stats. + void MergeIntoFreeList(Span* span) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Prepends span to appropriate free list, and adjusts stats. You'll probably + // want to adjust span->freelist_added_time before/after calling this + // function. + void PrependToFreeList(Span* span) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Removes span from its free list, and adjust stats. + void RemoveFromFreeList(Span* span) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Incrementally release some memory to the system. + // IncrementalScavenge(n) is called whenever n pages are freed. + void IncrementalScavenge(Length n) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Release the last span on the normal portion of this list. + // Return the length of that span. + Length ReleaseLastNormalSpan(SpanListPair* slist) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Prints stats for the given list of Spans. + // - span_type: a short, human-readable string describing the spans + // (typically "live" or "unmapped"). + // - span_size: the number of pages in each of the given spans. For large + // spans (which are of various sizes), pass kMaxPages. + // - span_size_prefix: We print this string immediately to the left of the + // span size. This is useful for large spans (e.g. you might pass ">="). + void DumpSubSpanStats(TCMalloc_Printer* out, const SpanList* spans, + const char* span_type, int span_size, + uint64_t cycle_now, double cycle_clock_freq, + const char* span_size_prefix); + // Prints summary stats for all live ("normal") or unmapped ("returned") + // pages. + void DumpSpanTotalStats(TCMalloc_Printer* out, uint64_t cycle_now, + double cycle_clock_freq, bool live_spans); + + // Do invariant testing. + bool Check() EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Number of pages to deallocate before doing more scavenging + int64_t scavenge_counter_ GUARDED_BY(pageheap_lock); + + // Index of last free list where we released memory to the OS. + int release_index_ GUARDED_BY(pageheap_lock); + + Span* AllocateSpan(Length n, bool* from_returned) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + void RecordSpan(Span* span) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_PAGE_HEAP_H_ diff --git a/tcmalloc/page_heap_allocator.h b/tcmalloc/page_heap_allocator.h new file mode 100644 index 000000000..7aef0c087 --- /dev/null +++ b/tcmalloc/page_heap_allocator.h @@ -0,0 +1,85 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ +#define TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ + +#include + +#include "absl/base/optimization.h" +#include "absl/base/thread_annotations.h" +#include "tcmalloc/arena.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { + +struct AllocatorStats { + // Number of allocated but unfreed objects + size_t in_use; + // Number of objects created (both free and allocated) + size_t total; +}; + +// Simple allocator for objects of a specified type. External locking +// is required before accessing one of these objects. +template +class PageHeapAllocator { + public: + // We use an explicit Init function because these variables are statically + // allocated and their constructors might not have run by the time some + // other static variable tries to allocate memory. + void Init(Arena* arena) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + arena_ = arena; + stats_ = {0, 0}; + free_list_ = nullptr; + // Reserve some space at the beginning to avoid fragmentation. + Delete(New()); + } + + T* New() EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + // Consult free list + T* result = free_list_; + stats_.in_use++; + if (ABSL_PREDICT_FALSE(result == nullptr)) { + stats_.total++; + return reinterpret_cast(arena_->Alloc(sizeof(T))); + } + free_list_ = *(reinterpret_cast(free_list_)); + return result; + } + + void Delete(T* p) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + *(reinterpret_cast(p)) = free_list_; + free_list_ = p; + stats_.in_use--; + } + + AllocatorStats stats() const EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return stats_; + } + + private: + // Arena from which to allocate memory + Arena* arena_; + + // Free list of already carved objects + T* free_list_ GUARDED_BY(pageheap_lock); + + AllocatorStats stats_ GUARDED_BY(pageheap_lock); +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ diff --git a/tcmalloc/page_heap_test.cc b/tcmalloc/page_heap_test.cc new file mode 100644 index 000000000..d203ad5e2 --- /dev/null +++ b/tcmalloc/page_heap_test.cc @@ -0,0 +1,110 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/page_heap.h" + +#include +#include + +#include +#include + +#include "gtest/gtest.h" +#include "absl/base/internal/spinlock.h" +#include "absl/memory/memory.h" +#include "tcmalloc/common.h" +#include "tcmalloc/pagemap.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { +namespace { + +// PageHeap expands by kMinSystemAlloc by default, so use this as the minimum +// Span length to not get more memory than expected. +static const size_t kMinSpanLength = kMinSystemAlloc >> kPageShift; + +static void CheckStats(const tcmalloc::PageHeap* ph, uint64_t system_pages, + uint64_t free_pages, uint64_t unmapped_pages) + LOCKS_EXCLUDED(tcmalloc::pageheap_lock) { + tcmalloc::BackingStats stats; + { + absl::base_internal::SpinLockHolder h(&tcmalloc::pageheap_lock); + stats = ph->stats(); + } + + ASSERT_EQ(system_pages, stats.system_bytes >> kPageShift); + ASSERT_EQ(free_pages, stats.free_bytes >> kPageShift); + ASSERT_EQ(unmapped_pages, stats.unmapped_bytes >> kPageShift); +} + +static void Delete(tcmalloc::PageHeap* ph, tcmalloc::Span* s) + LOCKS_EXCLUDED(tcmalloc::pageheap_lock) { + { + absl::base_internal::SpinLockHolder h(&tcmalloc::pageheap_lock); + ph->Delete(s); + } +} + +static Length Release(tcmalloc::PageHeap* ph, Length n) { + absl::base_internal::SpinLockHolder h(&tcmalloc::pageheap_lock); + return ph->ReleaseAtLeastNPages(n); +} + +class PageHeapTest : public ::testing::Test { + public: + PageHeapTest() { + // If this test is not linked against TCMalloc, the global arena used for + // metadata will not be initialized. + Static::InitIfNecessary(); + } +}; + +// TODO(b/36484267): replace this test wholesale. +TEST_F(PageHeapTest, Stats) { + auto pagemap = absl::make_unique(); + void* memory = calloc(1, sizeof(tcmalloc::PageHeap)); + tcmalloc::PageHeap* ph = new (memory) tcmalloc::PageHeap(pagemap.get(), + /*tagged=*/false); + + // Empty page heap + CheckStats(ph, 0, 0, 0); + + // Allocate a span 's1' + tcmalloc::Span* s1 = ph->New(kMinSpanLength); + CheckStats(ph, kMinSpanLength, 0, 0); + + // Allocate an aligned span 's2' + static const Length kHalf = kMinSpanLength / 2; + tcmalloc::Span* s2 = ph->NewAligned(kHalf, kHalf); + ASSERT_EQ(s2->first_page() % kHalf, 0); + CheckStats(ph, kMinSpanLength * 2, 0, kHalf); + + // Delete the old one + Delete(ph, s1); + CheckStats(ph, kMinSpanLength * 2, kMinSpanLength, kHalf); + + // Release the space from there: + Length released = Release(ph, 1); + ASSERT_EQ(released, kMinSpanLength); + CheckStats(ph, kMinSpanLength * 2, 0, kHalf + kMinSpanLength); + + // and delete the new one + Delete(ph, s2); + CheckStats(ph, kMinSpanLength * 2, kHalf, kHalf + kMinSpanLength); + + free(memory); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/pagemap.cc b/tcmalloc/pagemap.cc new file mode 100644 index 000000000..1b41772ee --- /dev/null +++ b/tcmalloc/pagemap.cc @@ -0,0 +1,69 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/pagemap.h" + +#include + +#include "tcmalloc/common.h" +#include "tcmalloc/span.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { + +void PageMap::RegisterSizeClass(Span* span, size_t sc) { + ASSERT(span->location() == Span::IN_USE); + const PageID first = span->first_page(); + const PageID last = span->last_page(); + ASSERT(GetDescriptor(first) == span); + for (PageID p = first; p <= last; ++p) { + map_.set_with_sizeclass(p, span, sc); + } +} + +void PageMap::UnregisterSizeClass(Span* span) { + ASSERT(span->location() == Span::IN_USE); + const PageID first = span->first_page(); + const PageID last = span->last_page(); + ASSERT(GetDescriptor(first) == span); + for (PageID p = first; p <= last; ++p) { + map_.clear_sizeclass(p); + } +} + +void PageMap::MapRootWithSmallPages() { + constexpr size_t kHugePageMask = ~(kHugePageSize - 1); + uintptr_t begin = reinterpret_cast(map_.RootAddress()); + // Round begin up to the nearest hugepage, this avoids causing memory before + // the start of the pagemap to become mapped onto small pages. + uintptr_t rbegin = (begin + kHugePageSize) & kHugePageMask; + size_t length = map_.RootSize(); + // Round end down to the nearest hugepage, this avoids causing memory after + // the end of the pagemap becoming mapped onto small pages. + size_t rend = (begin + length) & kHugePageMask; + // Since we have rounded the start up, and the end down, we also want to + // confirm that there is something left between them for us to modify. + // For small but slow, the root pagemap is less than a hugepage in size, + // so we will not end up forcing it to be small pages. + if (rend > rbegin) { + size_t rlength = rend - rbegin; + madvise(reinterpret_cast(rbegin), rlength, MADV_NOHUGEPAGE); + } +} + +void* MetaDataAlloc(size_t bytes) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return Static::arena()->Alloc(bytes); +} + +} // namespace tcmalloc diff --git a/tcmalloc/pagemap.h b/tcmalloc/pagemap.h new file mode 100644 index 000000000..4d0456db6 --- /dev/null +++ b/tcmalloc/pagemap.h @@ -0,0 +1,424 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// A data structure used by the caching malloc. It maps from page# to +// a pointer that contains info about that page using a two-level array. +// +// The BITS parameter should be the number of bits required to hold +// a page number. E.g., with 32 bit pointers and 8K pages (i.e., +// page offset fits in lower 13 bits), BITS == 19. +// +// A PageMap requires external synchronization, except for the get/sizeclass +// methods (see explanation at top of tcmalloc.cc). + +#ifndef TCMALLOC_PAGEMAP_H_ +#define TCMALLOC_PAGEMAP_H_ + +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/thread_annotations.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/span.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { + +// Two-level radix tree +typedef void* (*PagemapAllocator)(size_t); +void* MetaDataAlloc(size_t bytes); + +template +class PageMap2 { + private: + // The leaf node (regardless of pointer size) always maps 2^15 entries; + // with 8K pages, this gives us 256MB mapped per leaf node. + static const int kLeafBits = 15; + static const int kLeafLength = 1 << kLeafBits; + static const int kRootBits = (BITS >= kLeafBits) ? (BITS - kLeafBits) : 0; + // (1<= kHugePageSize, "leaf too small"); + static const size_t kLeafHugeBits = (kLeafBits + kPageShift - kHugePageShift); + static const size_t kLeafHugepages = kLeafCoveredBytes / kHugePageSize; + static_assert(kLeafHugepages == 1 << kLeafHugeBits, "sanity"); + struct Leaf { + // We keep parallel arrays indexed by page number. One keeps the + // size class; another span pointers; the last hugepage-related + // information. The size class information is kept segregated + // since small object deallocations are so frequent and do not + // need the other information kept in a Span. + uint8_t sizeclass[kLeafLength]; + void* span[kLeafLength]; + void* hugepage[kLeafHugepages]; + }; + + Leaf* root_[kRootLength]; // Top-level node + size_t bytes_used_; + + public: + typedef uintptr_t Number; + + constexpr PageMap2() : root_{}, bytes_used_(0) {} + + // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. + void* get(Number k) const NO_THREAD_SAFETY_ANALYSIS { + const Number i1 = k >> kLeafBits; + const Number i2 = k & (kLeafLength - 1); + if ((k >> BITS) > 0 || root_[i1] == nullptr) { + return nullptr; + } + return root_[i1]->span[i2]; + } + + // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. + // Requires that the span is known to already exist. + void* get_existing(Number k) const NO_THREAD_SAFETY_ANALYSIS { + const Number i1 = k >> kLeafBits; + const Number i2 = k & (kLeafLength - 1); + ASSERT((k >> BITS) == 0); + ASSERT(root_[i1] != nullptr); + return root_[i1]->span[i2]; + } + + // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. + // REQUIRES: Must be a valid page number previously Ensure()d. + uint8_t ABSL_ATTRIBUTE_ALWAYS_INLINE + sizeclass(Number k) const NO_THREAD_SAFETY_ANALYSIS { + const Number i1 = k >> kLeafBits; + const Number i2 = k & (kLeafLength - 1); + ASSERT((k >> BITS) == 0); + ASSERT(root_[i1] != nullptr); + return root_[i1]->sizeclass[i2]; + } + + void set(Number k, void* v) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> kLeafBits; + const Number i2 = k & (kLeafLength - 1); + root_[i1]->span[i2] = v; + } + + void set_with_sizeclass(Number k, void* v, uint8_t sc) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> kLeafBits; + const Number i2 = k & (kLeafLength - 1); + Leaf* leaf = root_[i1]; + leaf->span[i2] = v; + leaf->sizeclass[i2] = sc; + } + + void clear_sizeclass(Number k) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> kLeafBits; + const Number i2 = k & (kLeafLength - 1); + root_[i1]->sizeclass[i2] = 0; + } + + void* get_hugepage(Number k) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> kLeafBits; + const Number i2 = k & (kLeafLength - 1); + if ((k >> BITS) > 0 || root_[i1] == nullptr) { + return nullptr; + } + return root_[i1]->hugepage[i2 >> (kLeafBits - kLeafHugeBits)]; + } + + void set_hugepage(Number k, void* v) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> kLeafBits; + const Number i2 = k & (kLeafLength - 1); + root_[i1]->hugepage[i2 >> (kLeafBits - kLeafHugeBits)] = v; + } + + bool Ensure(Number start, size_t n) { + ASSERT(n > 0); + for (Number key = start; key <= start + n - 1; ) { + const Number i1 = key >> kLeafBits; + + // Check for overflow + if (i1 >= kRootLength) return false; + + // Make 2nd level node if necessary + if (root_[i1] == nullptr) { + Leaf* leaf = reinterpret_cast(Allocator(sizeof(Leaf))); + if (leaf == nullptr) return false; + bytes_used_ += sizeof(Leaf); + memset(leaf, 0, sizeof(*leaf)); + root_[i1] = leaf; + } + + // Advance key past whatever is covered by this leaf node + key = ((key >> kLeafBits) + 1) << kLeafBits; + } + return true; + } + + size_t bytes_used() const { + // Account for size of root node, etc. + return bytes_used_ + sizeof(*this); + } + + constexpr size_t RootSize() const { return sizeof(root_); } + const void* RootAddress() { return root_; } +}; + +// Three-level radix tree +// Currently only used for TCMALLOC_SMALL_BUT_SLOW +template +class PageMap3 { + private: + // For x86 we currently have 48 usable bits, for POWER we have 46. With + // 4KiB page sizes (12 bits) we end up with 36 bits for x86 and 34 bits + // for POWER. So leaf covers 4KiB * 1 << 12 = 16MiB - which is huge page + // size for POWER. + static const int kLeafBits = (BITS + 2) / 3; // Round up + static const int kLeafLength = 1 << kLeafBits; + static const int kMidBits = (BITS + 2) / 3; // Round up + static const int kMidLength = 1 << kMidBits; + static const int kRootBits = BITS - kLeafBits - kMidBits; + static_assert(kRootBits > 0, "Too many bits assigned to leaf and mid"); + // (1<= kHugePageSize, "leaf too small"); + static const size_t kLeafHugeBits = (kLeafBits + kPageShift - kHugePageShift); + static const size_t kLeafHugepages = kLeafCoveredBytes / kHugePageSize; + static_assert(kLeafHugepages == 1 << kLeafHugeBits, "sanity"); + struct Leaf { + // We keep parallel arrays indexed by page number. One keeps the + // size class; another span pointers; the last hugepage-related + // information. The size class information is kept segregated + // since small object deallocations are so frequent and do not + // need the other information kept in a Span. + uint8_t sizeclass[kLeafLength]; + void* span[kLeafLength]; + void* hugepage[kLeafHugepages]; + }; + + struct Node { + // Mid-level structure that holds pointers to leafs + Leaf* leafs[kMidLength]; + }; + + Node* root_[kRootLength]; // Top-level node + size_t bytes_used_; + + public: + typedef uintptr_t Number; + + constexpr PageMap3() : root_{}, bytes_used_(0) {} + + // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. + void* get(Number k) const NO_THREAD_SAFETY_ANALYSIS { + const Number i1 = k >> (kLeafBits + kMidBits); + const Number i2 = (k >> kLeafBits) & (kMidLength - 1); + const Number i3 = k & (kLeafLength - 1); + if ((k >> BITS) > 0 || root_[i1] == nullptr || + root_[i1]->leafs[i2] == nullptr) { + return nullptr; + } + return root_[i1]->leafs[i2]->span[i3]; + } + + // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. + // Requires that the span is known to already exist. + void* get_existing(Number k) const NO_THREAD_SAFETY_ANALYSIS { + const Number i1 = k >> (kLeafBits + kMidBits); + const Number i2 = (k >> kLeafBits) & (kMidLength - 1); + const Number i3 = k & (kLeafLength - 1); + ASSERT((k >> BITS) == 0); + ASSERT(root_[i1] != nullptr); + ASSERT(root_[i1]->leafs[i2] != nullptr); + return root_[i1]->leafs[i2]->span[i3]; + } + + // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. + // REQUIRES: Must be a valid page number previously Ensure()d. + uint8_t ABSL_ATTRIBUTE_ALWAYS_INLINE + sizeclass(Number k) const NO_THREAD_SAFETY_ANALYSIS { + const Number i1 = k >> (kLeafBits + kMidBits); + const Number i2 = (k >> kLeafBits) & (kMidLength - 1); + const Number i3 = k & (kLeafLength - 1); + ASSERT((k >> BITS) == 0); + ASSERT(root_[i1] != nullptr); + ASSERT(root_[i1]->leafs[i2] != nullptr); + return root_[i1]->leafs[i2]->sizeclass[i3]; + } + + void set(Number k, void* v) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> (kLeafBits + kMidBits); + const Number i2 = (k >> kLeafBits) & (kMidLength - 1); + const Number i3 = k & (kLeafLength - 1); + root_[i1]->leafs[i2]->span[i3] = v; + } + + void set_with_sizeclass(Number k, void* v, uint8_t sc) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> (kLeafBits + kMidBits); + const Number i2 = (k >> kLeafBits) & (kMidLength - 1); + const Number i3 = k & (kLeafLength - 1); + Leaf* leaf = root_[i1]->leafs[i2]; + leaf->span[i3] = v; + leaf->sizeclass[i3] = sc; + } + + void clear_sizeclass(Number k) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> (kLeafBits + kMidBits); + const Number i2 = (k >> kLeafBits) & (kMidLength - 1); + const Number i3 = k & (kLeafLength - 1); + root_[i1]->leafs[i2]->sizeclass[i3] = 0; + } + + void* get_hugepage(Number k) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> (kLeafBits + kMidBits); + const Number i2 = (k >> kLeafBits) & (kMidLength - 1); + const Number i3 = k & (kLeafLength - 1); + if ((k >> BITS) > 0 || root_[i1] == nullptr || + root_[i1]->leafs[i2] == nullptr) { + return nullptr; + } + return root_[i1]->leafs[i2]->hugepage[i3 >> (kLeafBits - kLeafHugeBits)]; + } + + void set_hugepage(Number k, void* v) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> (kLeafBits + kMidBits); + const Number i2 = (k >> kLeafBits) & (kMidLength - 1); + const Number i3 = k & (kLeafLength - 1); + root_[i1]->leafs[i2]->hugepage[i3 >> (kLeafBits - kLeafHugeBits)] = v; + } + + bool Ensure(Number start, size_t n) { + for (Number key = start; key <= start + n - 1;) { + const Number i1 = key >> (kLeafBits + kMidBits); + const Number i2 = (key >> kLeafBits) & (kMidLength - 1); + + // Check within root + if (i1 >= kRootLength) return false; + + // Allocate Node if necessary + if (root_[i1] == nullptr) { + Node* node = reinterpret_cast(Allocator(sizeof(Node))); + if (node == nullptr) return false; + bytes_used_ += sizeof(Node); + memset(node, 0, sizeof(*node)); + root_[i1] = node; + } + + // Allocate Leaf if necessary + if (root_[i1]->leafs[i2] == nullptr) { + Leaf* leaf = reinterpret_cast(Allocator(sizeof(Leaf))); + if (leaf == nullptr) return false; + bytes_used_ += sizeof(Leaf); + memset(leaf, 0, sizeof(*leaf)); + root_[i1]->leafs[i2] = leaf; + } + + // Advance key past whatever is covered by this leaf node + key = ((key >> kLeafBits) + 1) << kLeafBits; + } + return true; + } + + size_t bytes_used() const { return bytes_used_ + sizeof(*this); } + + constexpr size_t RootSize() const { return sizeof(root_); } + const void* RootAddress() { return root_; } +}; + +class PageMap { + public: + constexpr PageMap() : map_{} {} + + // Return the size class for p, or 0 if it is not known to tcmalloc + // or is a page containing large objects. + // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. + uint8_t sizeclass(PageID p) NO_THREAD_SAFETY_ANALYSIS { + return map_.sizeclass(p); + } + + void Set(PageID p, Span* span) { map_.set(p, span); } + + bool Ensure(PageID p, size_t n) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return map_.Ensure(p, n); + } + + // Mark an allocated span as being used for small objects of the + // specified size-class. + // REQUIRES: span was returned by an earlier call to PageAllocator::New() + // and has not yet been deleted. + // Concurrent calls to this method are safe unless they mark the same span. + void RegisterSizeClass(Span* span, size_t sc); + + // Mark an allocated span as being not used for any size-class. + // REQUIRES: span was returned by an earlier call to PageAllocator::New() + // and has not yet been deleted. + // Concurrent calls to this method are safe unless they mark the same span. + void UnregisterSizeClass(Span* span); + + // Return the descriptor for the specified page. Returns NULL if + // this PageID was not allocated previously. + // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. + inline Span* GetDescriptor(PageID p) const NO_THREAD_SAFETY_ANALYSIS { + return reinterpret_cast(map_.get(p)); + } + + // Return the descriptor for the specified page. + // PageID must have been previously allocated. + // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. + ABSL_ATTRIBUTE_RETURNS_NONNULL inline Span* GetExistingDescriptor( + PageID p) const NO_THREAD_SAFETY_ANALYSIS { + Span* span = reinterpret_cast(map_.get_existing(p)); + ASSERT(span != nullptr); + return span; + } + + size_t bytes() const EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + return map_.bytes_used(); + } + + void* GetHugepage(PageID p) { return map_.get_hugepage(p); } + + void SetHugepage(PageID p, void* v) { map_.set_hugepage(p, v); } + + // The PageMap root node can be quite large and sparsely used. If this + // gets mapped with hugepages we potentially end up holding a large + // amount of unused memory. So it is better to map the root node with + // small pages to minimise the amount of unused memory. + void MapRootWithSmallPages(); + + private: +#ifdef TCMALLOC_USE_PAGEMAP3 + PageMap3 map_; +#else + PageMap2 map_; +#endif +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_PAGEMAP_H_ diff --git a/tcmalloc/pagemap_unittest.cc b/tcmalloc/pagemap_unittest.cc new file mode 100644 index 000000000..f4f7723e4 --- /dev/null +++ b/tcmalloc/pagemap_unittest.cc @@ -0,0 +1,168 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/pagemap.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/random/random.h" +#include "tcmalloc/common.h" + +// Note: we leak memory every time a map is constructed, so do not +// create too many maps. + +namespace tcmalloc { +namespace { + +// Pick span pointer to use for page numbered i +void* span(intptr_t i) { + return reinterpret_cast(i+1); +} + +// Pick sizeclass to use for page numbered i +uint8_t sc(intptr_t i) { + return i % 16; +} + +class PageMapTest : public ::testing::TestWithParam { + public: + PageMapTest() { + // Arrange to pass zero-filled memory as the backing store for map. + memset(storage, 0, sizeof(Map)); + map = new (storage) Map(); + } + + ~PageMapTest() { + for (void* ptr : *ptrs()) { + ::operator delete(ptr); + } + ptrs()->clear(); + } + + private: + static std::vector* ptrs() { + static std::vector* ret = new std::vector(); + return ret; + } + + static void* alloc(size_t n) { + void* ptr = ::operator new(n); + ptrs()->push_back(ptr); + return ptr; + } + + public: + using Map = tcmalloc::PageMap2<20, alloc>; + Map* map; + + private: + alignas(Map) char storage[sizeof(Map)]; +}; + +TEST_P(PageMapTest, Sequential) { + const intptr_t limit = GetParam(); + + for (intptr_t i = 0; i < limit; i++) { + map->Ensure(i, 1); + map->set(i, span(i)); + ASSERT_EQ(map->get(i), span(i)); + + // Test size class handling + ASSERT_EQ(0, map->sizeclass(i)); + map->set_with_sizeclass(i, span(i), sc(i)); + ASSERT_EQ(sc(i), map->sizeclass(i)); + } + for (intptr_t i = 0; i < limit; i++) { + ASSERT_EQ(map->get(i), span(i)); + } +} + +TEST_P(PageMapTest, Bulk) { + const intptr_t limit = GetParam(); + + map->Ensure(0, limit); + for (intptr_t i = 0; i < limit; i++) { + map->set(i, span(i)); + ASSERT_EQ(map->get(i), span(i)); + } + for (intptr_t i = 0; i < limit; i++) { + ASSERT_EQ(map->get(i), span(i)); + } +} + +TEST_P(PageMapTest, Overflow) { + const intptr_t kLimit = 1 << 20; + ASSERT_FALSE(map->Ensure(kLimit, kLimit + 1)); +} + +TEST_P(PageMapTest, RandomAccess) { + const intptr_t limit = GetParam(); + + std::vector elements; + for (intptr_t i = 0; i < limit; i++) { + elements.push_back(i); + } + std::shuffle(elements.begin(), elements.end(), absl::BitGen()); + + for (intptr_t i = 0; i < limit; i++) { + map->Ensure(elements[i], 1); + map->set(elements[i], span(elements[i])); + ASSERT_EQ(map->get(elements[i]), span(elements[i])); + } + for (intptr_t i = 0; i < limit; i++) { + ASSERT_EQ(map->get(i), span(i)); + } +} + +INSTANTIATE_TEST_SUITE_P(Limits, PageMapTest, ::testing::Values(100, 1 << 20)); + +// Surround pagemap with unused memory. This isolates it so that it does not +// share pages with any other structures. This avoids the risk that adjacent +// objects might cause it to be mapped in. The padding is of sufficient size +// that this is true even if this structure is mapped with huge pages. +static struct PaddedPageMap { + constexpr PaddedPageMap() : padding_before{}, pagemap{}, padding_after{} {} + uint64_t padding_before[tcmalloc::kHugePageSize / sizeof(uint64_t)]; + tcmalloc::PageMap pagemap; + uint64_t padding_after[tcmalloc::kHugePageSize / sizeof(uint64_t)]; +} padded_pagemap_; + +TEST(TestMemoryFootprint, Test) { + uint64_t pagesize = sysconf(_SC_PAGESIZE); + ASSERT_NE(pagesize, 0); + size_t pages = sizeof(tcmalloc::PageMap) / pagesize + 1; + std::vector present(pages); + + // mincore needs the address rounded to the start page + uint64_t basepage = + reinterpret_cast(&padded_pagemap_.pagemap) & ~(pagesize - 1); + ASSERT_EQ(mincore(reinterpret_cast(basepage), + sizeof(tcmalloc::PageMap), present.data()), + 0); + for (int i = 0; i < pages; i++) { + EXPECT_EQ(present[i], 0); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/parameters.cc b/tcmalloc/parameters.cc new file mode 100644 index 000000000..f78eeb092 --- /dev/null +++ b/tcmalloc/parameters.cc @@ -0,0 +1,191 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "tcmalloc/parameters.h" + +#include "tcmalloc/common.h" +#include "tcmalloc/huge_page_aware_allocator.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/thread_cache.h" + +namespace tcmalloc { + +// As decide_subrelease() is determined at runtime, we cannot require constant +// initialization for the atomic. This avoids an initialization order fiasco. +static std::atomic* hpaa_subrelease_ptr() { + static std::atomic v(decide_subrelease()); + return &v; +} + +uint64_t Parameters::heap_size_hard_limit() { + size_t amount; + bool is_hard; + std::tie(amount, is_hard) = Static::page_allocator()->limit(); + if (!is_hard) { + amount = 0; + } + return amount; +} + +void Parameters::set_heap_size_hard_limit(uint64_t value) { + TCMalloc_Internal_SetHeapSizeHardLimit(value); +} + +bool Parameters::hpaa_subrelease() { + return hpaa_subrelease_ptr()->load(std::memory_order_relaxed); +} + +void Parameters::set_hpaa_subrelease(bool value) { + TCMalloc_Internal_SetHPAASubrelease(value); +} + +ABSL_CONST_INIT std::atomic Parameters::guarded_sampling_rate_( + 50 * kDefaultProfileSamplingRate); +ABSL_CONST_INIT std::atomic Parameters::lazy_per_cpu_caches_enabled_( + false); +ABSL_CONST_INIT std::atomic Parameters::max_per_cpu_cache_size_( + kMaxCpuCacheSize); +ABSL_CONST_INIT std::atomic Parameters::max_total_thread_cache_bytes_( + kDefaultOverallThreadCacheSize); +ABSL_CONST_INIT std::atomic + Parameters::peak_sampling_heap_growth_fraction_(1.25); +ABSL_CONST_INIT std::atomic Parameters::per_cpu_caches_enabled_( +#if defined(TCMALLOC_DEPRECATED_PERTHREAD) + false +#else + true +#endif +); + +ABSL_CONST_INIT std::atomic Parameters::profile_sampling_rate_( + kDefaultProfileSamplingRate +); + +} // namespace tcmalloc + +extern "C" { + +ABSL_ATTRIBUTE_UNUSED int64_t TCMalloc_GetProfileSamplingRate() { + return tcmalloc::Parameters::profile_sampling_rate(); +} + +ABSL_ATTRIBUTE_UNUSED void TCMalloc_SetProfileSamplingRate(int64_t value) { + tcmalloc::Parameters::set_profile_sampling_rate(value); +} + +ABSL_ATTRIBUTE_UNUSED int64_t TCMalloc_GetGuardedSamplingRate() { + return tcmalloc::Parameters::guarded_sampling_rate(); +} + +ABSL_ATTRIBUTE_UNUSED void TCMalloc_SetGuardedSamplingRate(int64_t value) { + tcmalloc::Parameters::set_guarded_sampling_rate(value); +} + +ABSL_ATTRIBUTE_UNUSED int64_t TCMalloc_GetMaxTotalThreadCacheBytes() { + return tcmalloc::Parameters::max_total_thread_cache_bytes(); +} + +ABSL_ATTRIBUTE_UNUSED void TCMalloc_SetMaxTotalThreadCacheBytes(int64_t value) { + tcmalloc::Parameters::set_max_total_thread_cache_bytes(value); +} + +uint64_t TCMalloc_Internal_GetHeapSizeHardLimit() { + return tcmalloc::Parameters::heap_size_hard_limit(); +} + +bool TCMalloc_Internal_GetHPAASubrelease() { + return tcmalloc::Parameters::hpaa_subrelease(); +} + +bool TCMalloc_Internal_GetLazyPerCpuCachesEnabled() { + return tcmalloc::Parameters::lazy_per_cpu_caches(); +} + +double TCMalloc_Internal_GetPeakSamplingHeapGrowthFraction() { + return tcmalloc::Parameters::peak_sampling_heap_growth_fraction(); +} + +bool TCMalloc_Internal_GetPerCpuCachesEnabled() { + return tcmalloc::Parameters::per_cpu_caches(); +} + +void TCMalloc_Internal_SetGuardedSamplingRate(int64_t v) { + tcmalloc::Parameters::guarded_sampling_rate_.store(v, + std::memory_order_relaxed); +} + +void TCMalloc_Internal_SetHeapSizeHardLimit(uint64_t value) { + // Ensure that page allocator is set up. + tcmalloc::Static::InitIfNecessary(); + + static absl::base_internal::SpinLock update_lock( + absl::base_internal::kLinkerInitialized); + absl::base_internal::SpinLockHolder l(&update_lock); + + size_t limit = std::numeric_limits::max(); + bool active = false; + if (value > 0) { + limit = value; + active = true; + } + + bool currently_hard = tcmalloc::Static::page_allocator()->limit().second; + if (active || currently_hard) { + // Avoid resetting limit when current limit is soft. + tcmalloc::Static::page_allocator()->set_limit(limit, active /* is_hard */); + Log(tcmalloc::kLog, __FILE__, __LINE__, + "[tcmalloc] set page heap hard limit to", limit, "bytes"); + } +} + +void TCMalloc_Internal_SetHPAASubrelease(bool v) { + tcmalloc::hpaa_subrelease_ptr()->store(v, std::memory_order_relaxed); +} + +void TCMalloc_Internal_SetLazyPerCpuCachesEnabled(bool v) { + tcmalloc::Parameters::lazy_per_cpu_caches_enabled_.store( + v, std::memory_order_relaxed); +} + +void TCMalloc_Internal_SetMaxPerCpuCacheSize(int32_t v) { + tcmalloc::Parameters::max_per_cpu_cache_size_.store( + v, std::memory_order_relaxed); +} + +void TCMalloc_Internal_SetMaxTotalThreadCacheBytes(int64_t v) { + tcmalloc::Parameters::max_total_thread_cache_bytes_.store( + v, std::memory_order_relaxed); + + absl::base_internal::SpinLockHolder l(&tcmalloc::pageheap_lock); + tcmalloc::ThreadCache::set_overall_thread_cache_size(v); +} + +void TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(double v) { + tcmalloc::Parameters::peak_sampling_heap_growth_fraction_.store( + v, std::memory_order_relaxed); +} + +void TCMalloc_Internal_SetPerCpuCachesEnabled(bool v) { + tcmalloc::Parameters::per_cpu_caches_enabled_.store( + v, std::memory_order_relaxed); +} + +void TCMalloc_Internal_SetProfileSamplingRate(int64_t v) { + tcmalloc::Parameters::profile_sampling_rate_.store(v, + std::memory_order_relaxed); +} + +} // extern "C" + + diff --git a/tcmalloc/parameters.h b/tcmalloc/parameters.h new file mode 100644 index 000000000..8a40266c4 --- /dev/null +++ b/tcmalloc/parameters.h @@ -0,0 +1,115 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_PARAMETERS_H_ +#define TCMALLOC_PARAMETERS_H_ + +#include +#include +#include + +#include "absl/base/internal/spinlock.h" +#include "absl/types/optional.h" +#include "tcmalloc/internal/parameter_accessors.h" + +namespace tcmalloc { + +class Parameters { + public: + + static uint64_t heap_size_hard_limit(); + static void set_heap_size_hard_limit(uint64_t value); + + static bool hpaa_subrelease(); + static void set_hpaa_subrelease(bool value); + + static int64_t guarded_sampling_rate() { + return guarded_sampling_rate_.load(std::memory_order_relaxed); + } + + static void set_guarded_sampling_rate(int64_t value) { + TCMalloc_Internal_SetGuardedSamplingRate(value); + } + + static int32_t max_per_cpu_cache_size() { + return max_per_cpu_cache_size_.load(std::memory_order_relaxed); + } + + static void set_max_per_cpu_cache_size(int32_t value) { + TCMalloc_Internal_SetMaxPerCpuCacheSize(value); + } + + static int64_t max_total_thread_cache_bytes() { + return max_total_thread_cache_bytes_.load(std::memory_order_relaxed); + } + + static void set_max_total_thread_cache_bytes(int64_t value) { + TCMalloc_Internal_SetMaxTotalThreadCacheBytes(value); + } + + static double peak_sampling_heap_growth_fraction() { + return peak_sampling_heap_growth_fraction_.load(std::memory_order_relaxed); + } + + static void set_peak_sampling_heap_growth_fraction(double value) { + TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(value); + } + + static bool lazy_per_cpu_caches() { + return lazy_per_cpu_caches_enabled_.load(std::memory_order_relaxed); + } + + static void set_lazy_per_cpu_caches(bool value) { + TCMalloc_Internal_SetLazyPerCpuCachesEnabled(value); + } + + static bool per_cpu_caches() { + return per_cpu_caches_enabled_.load(std::memory_order_relaxed); + } + + static void set_per_cpu_caches(bool value) { + TCMalloc_Internal_SetPerCpuCachesEnabled(value); + } + + static int64_t profile_sampling_rate() { + return profile_sampling_rate_.load(std::memory_order_relaxed); + } + + static void set_profile_sampling_rate(int64_t value) { + TCMalloc_Internal_SetProfileSamplingRate(value); + } + + private: + friend void ::TCMalloc_Internal_SetGuardedSamplingRate(int64_t v); + friend void ::TCMalloc_Internal_SetHPAASubrelease(bool v); + friend void ::TCMalloc_Internal_SetLazyPerCpuCachesEnabled(bool v); + friend void ::TCMalloc_Internal_SetMaxPerCpuCacheSize(int32_t v); + friend void ::TCMalloc_Internal_SetMaxTotalThreadCacheBytes(int64_t v); + friend void ::TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(double v); + friend void ::TCMalloc_Internal_SetPerCpuCachesEnabled(bool v); + friend void ::TCMalloc_Internal_SetProfileSamplingRate(int64_t v); + + static std::atomic guarded_sampling_rate_; + static std::atomic hpaa_subrelease_; + static std::atomic lazy_per_cpu_caches_enabled_; + static std::atomic max_per_cpu_cache_size_; + static std::atomic max_total_thread_cache_bytes_; + static std::atomic peak_sampling_heap_growth_fraction_; + static std::atomic per_cpu_caches_enabled_; + static std::atomic profile_sampling_rate_; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_PARAMETERS_H_ diff --git a/tcmalloc/peak_heap_tracker.cc b/tcmalloc/peak_heap_tracker.cc new file mode 100644 index 000000000..34b313b31 --- /dev/null +++ b/tcmalloc/peak_heap_tracker.cc @@ -0,0 +1,94 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/peak_heap_tracker.h" + +#include + +#include "absl/base/internal/spinlock.h" +#include "absl/memory/memory.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/page_heap_allocator.h" +#include "tcmalloc/parameters.h" +#include "tcmalloc/sampler.h" +#include "tcmalloc/span.h" +#include "tcmalloc/stack_trace_table.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { + +bool PeakHeapTracker::IsNewPeak() { + return peak_sampled_heap_size_.value() == 0 || + (static_cast(Static::sampled_objects_size_.value()) / + peak_sampled_heap_size_.value() > + Parameters::peak_sampling_heap_growth_fraction()); +} + +void PeakHeapTracker::MaybeSaveSample() { + if (Parameters::peak_sampling_heap_growth_fraction() <= 0 || !IsNewPeak()) { + return; + } + + absl::base_internal::SpinLockHolder h(&pageheap_lock); + + // double-check in case another allocation was sampled (or a sampled + // allocation freed) while we were waiting for the lock + if (!IsNewPeak()) { + return; + } + peak_sampled_heap_size_.LossyAdd(Static::sampled_objects_size_.value() - + peak_sampled_heap_size_.value()); + + StackTrace *t = peak_sampled_span_stacks_, *next = nullptr; + while (t != nullptr) { + next = reinterpret_cast(t->stack[kMaxStackDepth - 1]); + Static::stacktrace_allocator()->Delete(t); + t = next; + } + + next = nullptr; + for (Span* s : Static::sampled_objects_) { + t = Static::stacktrace_allocator()->New(); + if (t == nullptr) { + Log(kLog, __FILE__, __LINE__, "tcmalloc: could not allocate stack trace", + sizeof(*t)); + t = next; + break; + } + + *t = *s->sampled_stack(); + if (t->depth == kMaxStackDepth) { + t->depth = kMaxStackDepth - 1; + } + t->stack[kMaxStackDepth - 1] = reinterpret_cast(next); + next = t; + } + peak_sampled_span_stacks_ = t; +} + +std::unique_ptr PeakHeapTracker::DumpSample() + const { + auto profile = absl::make_unique( + ProfileType::kPeakHeap, Sampler::GetSamplePeriod(), true, true); + + absl::base_internal::SpinLockHolder h(&pageheap_lock); + for (StackTrace* t = peak_sampled_span_stacks_; t != nullptr; + t = reinterpret_cast( + t->stack[tcmalloc::kMaxStackDepth - 1])) { + profile->AddTrace(1.0, *t); + } + return profile; +} + +} // namespace tcmalloc diff --git a/tcmalloc/peak_heap_tracker.h b/tcmalloc/peak_heap_tracker.h new file mode 100644 index 000000000..6c85d1ede --- /dev/null +++ b/tcmalloc/peak_heap_tracker.h @@ -0,0 +1,64 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_PEAK_HEAP_TRACKER_H_ +#define TCMALLOC_PEAK_HEAP_TRACKER_H_ + +#include "absl/base/thread_annotations.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/atomic_stats_counter.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { + +class PeakHeapTracker { + public: + // Constructor should do nothing since we rely on explicit Init() + // call, which may or may not be called before the constructor runs. + PeakHeapTracker() {} + + // Explicit Init is required because constructor for our single static + // instance may not have run by the time it is used + void Init() { + peak_sampled_span_stacks_ = nullptr; + peak_sampled_heap_size_.Clear(); + } + + // Possibly save high-water-mark allocation stack traces for peak-heap + // profile. Should be called immediately after sampling an allocation. If + // the heap has grown by a sufficient amount since the last high-water-mark, + // it will save a copy of the sample profile. + void MaybeSaveSample() LOCKS_EXCLUDED(pageheap_lock); + + // Return the saved high-water-mark heap profile, if any. + std::unique_ptr DumpSample() const + LOCKS_EXCLUDED(pageheap_lock); + + private: + // Linked list of stack traces from sampled allocations saved (from + // sampled_objects_ above) when we allocate memory from the system. The + // linked list pointer is stored in StackTrace::stack[kMaxStackDepth-1]. + StackTrace* peak_sampled_span_stacks_; + + // Sampled heap size last time peak_sampled_span_stacks_ was saved. Only + // written under pageheap_lock; may be read without it. + tcmalloc_internal::StatsCounter peak_sampled_heap_size_; + + bool IsNewPeak(); +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_PEAK_HEAP_TRACKER_H_ diff --git a/tcmalloc/percpu_tcmalloc.h b/tcmalloc/percpu_tcmalloc.h new file mode 100644 index 000000000..44d5162ad --- /dev/null +++ b/tcmalloc/percpu_tcmalloc.h @@ -0,0 +1,525 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_PERCPU_TCMALLOC_H_ +#define TCMALLOC_PERCPU_TCMALLOC_H_ + +#include + +#include +#include + +#include "absl/base/dynamic_annotations.h" +#include "absl/base/internal/sysinfo.h" +#include "tcmalloc/internal/mincore.h" +#include "tcmalloc/internal/percpu.h" + +namespace tcmalloc { + +struct PerCPUMetadataState { + size_t virtual_size; + size_t resident_size; +}; + +namespace subtle { +namespace percpu { + +// Tcmalloc slab for per-cpu caching mode. +// Conceptually it is equivalent to an array of NumClasses PerCpuSlab's, +// and in fallback implementation it is implemented that way. But optimized +// implementation uses more compact layout and provides faster operations. +// +// Methods of this type must only be used in threads where it is known that the +// percpu primitives are available and percpu::IsFast() has previously returned +// 'true'. +// +// The template parameter Shift indicates the number of bits to shift the +// the CPU id in order to get the location of the per-cpu slab. If this +// parameter matches PERCPU_TCMALLOC_FIXED_SLAB_SHIFT as set in +// percpu_intenal.h then the assembly language versions of push/pop batch +// can be used; otherwise batch operations are emulated. +template +class TcmallocSlab { + public: + TcmallocSlab() {} + + // Init must be called before any other methods. + // is memory allocation callback (e.g. malloc). + // callback returns max capacity for size class . + // indicates that per-CPU slabs should be populated on demand + // + // Initial capacity is 0 for all slabs. + void Init(void*(alloc)(size_t size), size_t (*capacity)(size_t cl), + bool lazy); + + // Only may be called if Init(..., lazy = true) was used. + void InitCPU(int cpu, size_t (*capacity)(size_t cl)); + + // For tests. + void Destroy(void(free)(void*)); + + // Number of elements in cpu/cl slab. + size_t Length(int cpu, size_t cl) const; + + // Number of elements (currently) allowed in cpu/cl slab. + size_t Capacity(int cpu, size_t cl) const; + + // If running on cpu, increment the cpu/cl slab's capacity to no greater than + // min(capacity+len, max_cap) and return the increment applied. Otherwise + // return 0. Note: max_cap must be the same as returned by capacity callback + // passed to Init. + size_t Grow(int cpu, size_t cl, size_t len, size_t max_cap); + + // If running on cpu, decrement the cpu/cl slab's capacity to no less than + // max(capacity-len, 0) and return the actual decrement applied. Otherwise + // return 0. + size_t Shrink(int cpu, size_t cl, size_t len); + + // Add an item (which must be non-zero) to the current CPU's slab. Returns + // true if add succeeds. Otherwise invokes and returns false (assuming + // that returns negative value). + bool Push(size_t cl, void* item, OverflowHandler f); + + // Remove an item (LIFO) from the current CPU's slab. If the slab is empty, + // invokes and returns its result. + void* Pop(size_t cl, UnderflowHandler f); + + // Add up to items to the current cpu slab from the array located at + // . Returns the number of items that were added (possibly 0). All + // items not added will be returned at the start of . Items are only + // not added if there is no space on the current cpu. + // REQUIRES: len > 0. + size_t PushBatch(size_t cl, void** batch, size_t len); + + // Pop up to items from the current cpu slab and return them in . + // Returns the number of items actually removed. + // REQUIRES: len > 0. + size_t PopBatch(size_t cl, void** batch, size_t len); + + // Remove all items (of all classes) from 's slab; reset capacity for all + // classes to zero. Then, for each sizeclass, invoke + // DrainHandler(drain_ctx, cl, , ); + // + // It is invalid to concurrently execute Drain() for the same CPU; calling + // Push/Pop/Grow/Shrink concurrently (even on the same CPU) is safe. + typedef void (*DrainHandler)(void* drain_ctx, size_t cl, void** batch, + size_t n, size_t cap); + void Drain(int cpu, void* drain_ctx, DrainHandler f); + + PerCPUMetadataState MetadataMemoryUsage() const; + + private: + // Slab header (packed, atomically updated 64-bit). + struct Header { + // All values are word offsets from per-CPU region start. + // The array is [begin, end). + uint16_t current; + // Copy of end. Updated by Shrink/Grow, but is not overwritten by Drain. + uint16_t end_copy; + // Lock updates only begin and end with a 32-bit write. + uint16_t begin; + uint16_t end; + + // Lock is used by Drain to stop concurrent mutations of the Header. + // Lock sets begin to 0xffff and end to 0, which makes Push and Pop fail + // regardless of current value. + bool IsLocked() const; + void Lock(); + }; + + // We cast Header to std::atomic. + static_assert(sizeof(Header) == sizeof(std::atomic), + "bad Header size"); + + // We use a single continuous region of memory for all slabs on all CPUs. + // This region is split into NumCPUs regions of size kPerCpuMem (256k). + // First NumClasses words of each CPU region are occupied by slab + // headers (Header struct). The remaining memory contain slab arrays. + struct Slabs { + std::atomic header[NumClasses]; + void* mem[((1ul << Shift) - sizeof(header)) / sizeof(void*)]; + }; + static_assert(sizeof(Slabs) == (1ul << Shift), "Slabs has unexpected size"); + + Slabs* slabs_; + + Slabs* CpuMemoryStart(int cpu) const; + std::atomic* GetHeader(int cpu, size_t cl) const; + static Header LoadHeader(std::atomic* hdrp); + static void StoreHeader(std::atomic* hdrp, Header hdr); + static int CompareAndSwapHeader(int cpu, std::atomic* hdrp, + Header old, Header hdr); +}; + +template +inline size_t TcmallocSlab::Length(int cpu, + size_t cl) const { + Header hdr = LoadHeader(GetHeader(cpu, cl)); + return hdr.IsLocked() ? 0 : hdr.current - hdr.begin; +} + +template +inline size_t TcmallocSlab::Capacity(int cpu, + size_t cl) const { + Header hdr = LoadHeader(GetHeader(cpu, cl)); + return hdr.IsLocked() ? 0 : hdr.end - hdr.begin; +} + +template +inline size_t TcmallocSlab::Grow(int cpu, size_t cl, + size_t len, + size_t max_cap) { + std::atomic* hdrp = GetHeader(cpu, cl); + for (;;) { + Header old = LoadHeader(hdrp); + if (old.IsLocked() || old.end - old.begin == max_cap) { + return 0; + } + uint16_t n = std::min(len, max_cap - (old.end - old.begin)); + Header hdr = old; + hdr.end += n; + hdr.end_copy += n; + const int ret = CompareAndSwapHeader(cpu, hdrp, old, hdr); + if (ret == cpu) { + return n; + } else if (ret >= 0) { + return 0; + } + } +} + +template +inline size_t TcmallocSlab::Shrink(int cpu, size_t cl, + size_t len) { + std::atomic* hdrp = GetHeader(cpu, cl); + for (;;) { + Header old = LoadHeader(hdrp); + if (old.IsLocked() || old.current == old.end) { + return 0; + } + uint16_t n = std::min(len, old.end - old.current); + Header hdr = old; + hdr.end -= n; + hdr.end_copy -= n; + const int ret = CompareAndSwapHeader(cpu, hdrp, old, hdr); + if (ret == cpu) { + return n; + } else if (ret >= 0) { + return 0; + } + } +} + +template +inline ABSL_ATTRIBUTE_ALWAYS_INLINE bool TcmallocSlab::Push( + size_t cl, void* item, OverflowHandler f) { + ASSERT(item != nullptr); + if (Shift == PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { + return TcmallocSlab_Push_FixedShift(slabs_, cl, item, f) >= 0; + } else { + return TcmallocSlab_Push(slabs_, cl, item, Shift, f) >= 0; + } +} + +template +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab::Pop( + size_t cl, UnderflowHandler f) { + if (Shift == PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { + return TcmallocSlab_Pop_FixedShift(slabs_, cl, f); + } else { + return TcmallocSlab_Pop(slabs_, cl, f, Shift); + } +} + +static inline void* NoopUnderflow(int cpu, size_t cl) { return nullptr; } + +static inline int NoopOverflow(int cpu, size_t cl, void* item) { return -1; } + +template +inline size_t TcmallocSlab::PushBatch(size_t cl, + void** batch, + size_t len) { + ASSERT(len != 0); + if (Shift == PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { + return TcmallocSlab_PushBatch_FixedShift(slabs_, cl, batch, len); + } else { + size_t n = 0; + // Push items until either all done or a push fails + while (n < len && Push(cl, batch[len - 1 - n], NoopOverflow)) { + n++; + } + return n; + } +} + +template +inline size_t TcmallocSlab::PopBatch(size_t cl, void** batch, + size_t len) { + ASSERT(len != 0); + size_t n = 0; + if (Shift == PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { + n = TcmallocSlab_PopBatch_FixedShift(slabs_, cl, batch, len); + // PopBatch is implemented in assembly, msan does not know that the returned + // batch is initialized. + ANNOTATE_MEMORY_IS_INITIALIZED(batch, n * sizeof(batch[0])); + } else { + // Pop items until either all done or a pop fails + while (n < len && (batch[n] = Pop(cl, NoopUnderflow))) { + n++; + } + } + return n; +} + +template +inline typename TcmallocSlab::Slabs* +TcmallocSlab::CpuMemoryStart(int cpu) const { + return &slabs_[cpu]; +} + +template +inline std::atomic* TcmallocSlab::GetHeader( + int cpu, size_t cl) const { + return &CpuMemoryStart(cpu)->header[cl]; +} + +template +inline typename TcmallocSlab::Header +TcmallocSlab::LoadHeader(std::atomic* hdrp) { + uint64_t raw = hdrp->load(std::memory_order_relaxed); + Header hdr; + memcpy(&hdr, &raw, sizeof(hdr)); + return hdr; +} + +template +inline void TcmallocSlab::StoreHeader( + std::atomic* hdrp, Header hdr) { + uint64_t raw; + memcpy(&raw, &hdr, sizeof(raw)); + hdrp->store(raw, std::memory_order_relaxed); +} + +template +inline int TcmallocSlab::CompareAndSwapHeader( + int cpu, std::atomic* hdrp, Header old, Header hdr) { +#if __WORDSIZE == 64 + uint64_t old_raw, new_raw; + memcpy(&old_raw, &old, sizeof(old_raw)); + memcpy(&new_raw, &hdr, sizeof(new_raw)); + return CompareAndSwapUnsafe(cpu, hdrp, static_cast(old_raw), + static_cast(new_raw)); +#else + Log(kCrash, __FILE__, __LINE__, "This architecture is not supported."); +#endif +} + +template +inline bool TcmallocSlab::Header::IsLocked() const { + return begin == 0xffffu; +} + +template +inline void TcmallocSlab::Header::Lock() { + // Write 0xffff to begin and 0 to end. This blocks new Push'es and Pop's. + // Note: we write only 4 bytes. The first 4 bytes are left intact. + // See Drain method for details. tl;dr: C++ does not allow us to legally + // express this without undefined behavior. + std::atomic* p = reinterpret_cast*>(&begin); + Header hdr; + hdr.begin = 0xffffu; + hdr.end = 0; + int32_t raw; + memcpy(&raw, &hdr.begin, sizeof(raw)); + p->store(raw, std::memory_order_relaxed); +} + +template +void TcmallocSlab::Init(void*(alloc)(size_t size), + size_t (*capacity)(size_t cl), + bool lazy) { + size_t mem_size = absl::base_internal::NumCPUs() * (1ul << Shift); + void* backing = alloc(mem_size); + // MSan does not see writes in assembly. + ANNOTATE_MEMORY_IS_INITIALIZED(backing, mem_size); + if (!lazy) { + memset(backing, 0, mem_size); + } + slabs_ = static_cast(backing); + size_t bytes_used = 0; + for (int cpu = 0; cpu < absl::base_internal::NumCPUs(); ++cpu) { + bytes_used += sizeof(std::atomic) * NumClasses; + + for (size_t cl = 0; cl < NumClasses; ++cl) { + size_t cap = capacity(cl); + if (!cap) { + continue; + } + + // One extra element for prefetch + bytes_used += (cap + 1) * sizeof(void*); + } + + if (!lazy) { + InitCPU(cpu, capacity); + } + } + // Check for less than 90% usage of the reserved memory + if (bytes_used * 10 < 9 * mem_size) { + Log(kLog, __FILE__, __LINE__, "Bytes used per cpu of available", bytes_used, + mem_size); + } +} + +template +void TcmallocSlab::InitCPU(int cpu, + size_t (*capacity)(size_t cl)) { + void** elems = slabs_[cpu].mem; + for (size_t cl = 0; cl < NumClasses; ++cl) { + size_t cap = capacity(cl); + if (!cap) { + continue; + } + CHECK_CONDITION(static_cast(cap) == cap); + + Header hdr = {}; + // In Pop() we prefetch the item a subsequent Pop() would return; + // this is slow if it's not a valid pointer. To avoid this problem + // when popping the last item, keep one fake item before the actual + // ones (that points, safely, to itself.) + *elems = elems; + elems++; + size_t begin = elems - reinterpret_cast(CpuMemoryStart(cpu)); + hdr.current = begin; + hdr.begin = begin; + hdr.end = begin; + hdr.end_copy = begin; + elems += cap; + CHECK_CONDITION(reinterpret_cast(elems) - + reinterpret_cast(CpuMemoryStart(cpu)) <= + (1 << Shift)); + + StoreHeader(GetHeader(cpu, cl), hdr); + } +} + +template +void TcmallocSlab::Destroy(void(free)(void*)) { + free(slabs_); + slabs_ = nullptr; +} + +template +void TcmallocSlab::Drain(int cpu, void* ctx, + DrainHandler f) { + CHECK_CONDITION(cpu >= 0); + CHECK_CONDITION(cpu < absl::base_internal::NumCPUs()); + + // Push/Pop/Grow/Shrink can be executed concurrently with Drain. + // That's not an expected case, but it must be handled for correctness. + // Push/Pop/Grow/Shrink can only be executed on and use rseq primitives. + // Push only updates current. Pop only updates current and end_copy + // (it mutates only current but uses 4 byte write for performance). + // Grow/Shrink mutate end and end_copy using 64-bit stores. + + // We attempt to stop all concurrent operations by writing 0xffff to begin + // and 0 to end. However, Grow/Shrink can overwrite our write, so we do this + // in a loop until we know that the header is in quiescent state. + + // Phase 1: collect all begin's (these are not mutated by anybody else). + uint16_t begin[NumClasses]; + for (size_t cl = 0; cl < NumClasses; ++cl) { + Header hdr = LoadHeader(GetHeader(cpu, cl)); + CHECK_CONDITION(!hdr.IsLocked()); + begin[cl] = hdr.begin; + } + + // Phase 2: stop concurrent mutations. + for (bool done = false; !done;) { + for (size_t cl = 0; cl < NumClasses; ++cl) { + // Note: this reinterpret_cast and write in Lock lead to undefined + // behavior, because the actual object type is std::atomic. But + // C++ does not allow to legally express what we need here: atomic writes + // of different sizes. + reinterpret_cast(GetHeader(cpu, cl))->Lock(); + } + FenceCpu(cpu); + done = true; + for (size_t cl = 0; cl < NumClasses; ++cl) { + Header hdr = LoadHeader(GetHeader(cpu, cl)); + if (!hdr.IsLocked()) { + // Header was overwritten by Grow/Shrink. Retry. + done = false; + break; + } + } + } + + // Phase 3: execute callbacks. + for (size_t cl = 0; cl < NumClasses; ++cl) { + Header hdr = LoadHeader(GetHeader(cpu, cl)); + // We overwrote begin and end, instead we use our local copy of begin + // and end_copy. + size_t n = hdr.current - begin[cl]; + size_t cap = hdr.end_copy - begin[cl]; + void** batch = reinterpret_cast(GetHeader(cpu, 0) + begin[cl]); + f(ctx, cl, batch, n, cap); + } + + // Phase 4: reset current to beginning of the region. + // We can't write all 4 fields at once with a single write, because Pop does + // several non-atomic loads of the fields. Consider that a concurrent Pop + // loads old current (still pointing somewhere in the middle of the region); + // then we update all fields with a single write; then Pop loads the updated + // begin which allows it to proceed; then it decrements current below begin. + // + // So we instead first just update current--our locked begin/end guarantee + // no Push/Pop will make progress. Once we Fence below, we know no Push/Pop + // is using the old current, and can safely update begin/end to be an empty + // slab. + for (size_t cl = 0; cl < NumClasses; ++cl) { + std::atomic* hdrp = GetHeader(cpu, cl); + Header hdr = LoadHeader(hdrp); + hdr.current = begin[cl]; + StoreHeader(hdrp, hdr); + } + + // Phase 5: fence and reset the remaining fields to beginning of the region. + // This allows concurrent mutations again. + FenceCpu(cpu); + for (size_t cl = 0; cl < NumClasses; ++cl) { + std::atomic* hdrp = GetHeader(cpu, cl); + Header hdr; + hdr.current = begin[cl]; + hdr.begin = begin[cl]; + hdr.end = begin[cl]; + hdr.end_copy = begin[cl]; + StoreHeader(hdrp, hdr); + } +} + +template +PerCPUMetadataState TcmallocSlab::MetadataMemoryUsage() + const { + PerCPUMetadataState result; + result.virtual_size = absl::base_internal::NumCPUs() * sizeof(*slabs_); + result.resident_size = MInCore::residence(slabs_, result.virtual_size); + return result; +} + +} // namespace percpu +} // namespace subtle +} // namespace tcmalloc + +#endif // TCMALLOC_PERCPU_TCMALLOC_H_ diff --git a/tcmalloc/profile_test.cc b/tcmalloc/profile_test.cc new file mode 100644 index 000000000..c45dd3e86 --- /dev/null +++ b/tcmalloc/profile_test.cc @@ -0,0 +1,209 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/container/flat_hash_map.h" +#include "tcmalloc/internal/declarations.h" +#include "tcmalloc/internal/linked_list.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace { + +TEST(AllocationSampleTest, TokenAbuse) { + auto token = MallocExtension::StartAllocationProfiling(); + ::operator delete(::operator new(512 * 1024 * 1024)); + // Repeated Claims should happily return null. + auto profile = std::move(token).Stop(); + int count = 0; + profile.Iterate([&](const Profile::Sample &) { count++; }); + EXPECT_EQ(count, 1); + + auto profile2 = std::move(token).Stop(); // NOLINT: use-after-move intended + int count2 = 0; + profile2.Iterate([&](const Profile::Sample &) { count2++; }); + EXPECT_EQ(count2, 0); + + // Delete (on the scope ending) without Claim should also be OK. + { MallocExtension::StartAllocationProfiling(); } +} + +TEST(AllocationSampleTest, SampleAccuracy) { + // Disable GWP-ASan, since it allocates different sizes than normal samples. + MallocExtension::SetGuardedSamplingRate(-1); + + // Allocate about 512 MiB each of various sizes. For _some_ but not all + // sizes, delete it as we go--it shouldn't matter for the sample count. + static const size_t kTotalPerSize = 512 * 1024 * 1024; + + // objects we don't delete as we go + void *list = nullptr; + + // (object size, object alignment, keep objects) + struct Requests { + size_t size; + size_t alignment; + bool keep; + }; + std::vector sizes = {{8, 0, false}, + {16, 16, true}, + {1024, 0, false}, + {64 * 1024, 64, false}, + {1024 * 1024, 128, true}}; + std::set sizes_expected; + for (auto s : sizes) { + sizes_expected.insert(s.size); + } + auto token = MallocExtension::StartAllocationProfiling(); + + // We use new/delete to allocate memory, as malloc returns objects aligned to + // std::max_align_t. + for (auto s : sizes) { + for (size_t bytes = 0; bytes < kTotalPerSize; bytes += s.size) { + void *obj; + if (s.alignment > 0) { + obj = operator new(s.size, static_cast(s.alignment)); + } else { + obj = operator new(s.size); + } + if (s.keep) { + tcmalloc::SLL_Push(&list, obj); + } else { + operator delete(obj); + } + } + } + auto profile = std::move(token).Stop(); + + // size -> bytes seen + absl::flat_hash_map m; + + // size -> alignment request + absl::flat_hash_map alignment; + + for (auto s : sizes) { + alignment[s.size] = s.alignment; + } + + profile.Iterate([&](const tcmalloc::Profile::Sample &e) { + // Don't check stack traces until we have evidence that's broken, it's + // tedious and done fairly well elsewhere. + m[e.allocated_size] += e.sum; + EXPECT_EQ(alignment[e.requested_size], e.requested_alignment); + }); + + size_t max_bytes = 0, min_bytes = std::numeric_limits::max(); + EXPECT_EQ(m.size(), sizes_expected.size()); + for (auto seen : m) { + size_t size = seen.first; + EXPECT_TRUE(sizes_expected.find(size) != sizes_expected.end()) << size; + size_t bytes = seen.second; + min_bytes = std::min(min_bytes, bytes); + max_bytes = std::max(max_bytes, bytes); + } + // Hopefully we're in a fairly small range, that contains our actual + // allocation. + // TODO(b/134690164): better statistical tests here. + EXPECT_GE((min_bytes * 3) / 2, max_bytes); + EXPECT_LE((min_bytes * 3) / 4, kTotalPerSize); + EXPECT_LE(kTotalPerSize, (max_bytes * 4) / 3); + // Remove the objects we left alive + while (list != nullptr) { + void *obj = tcmalloc::SLL_Pop(&list); + operator delete(obj); + } +} + +TEST(FragmentationzTest, Accuracy) { + // Disable GWP-ASan, since it allocates different sizes than normal samples. + MallocExtension::SetGuardedSamplingRate(-1); + + // a fairly odd allocation size - will be rounded to 128. This lets + // us find our record in the table. + static const size_t kItemSize = 115; + // allocate about 3.5 GiB: + static const size_t kNumItems = 32 * 1024 * 1024; + + std::vector> keep; + std::vector> drop; + // hint expected sizes: + drop.reserve(kNumItems * 8 / 10); + keep.reserve(kNumItems * 2 / 10); + + // We allocate many items, then free 80% of them "randomly". (To + // decrease noise and speed up, we just keep every 5th one exactly.) + for (int i = 0; i < kNumItems; ++i) { + // Ideally we should use a malloc() here, for consistency; but unique_ptr + // doesn't come with a have a "free()" deleter; use ::operator new insted. + (i % 5 == 0 ? keep : drop) + .push_back(std::unique_ptr( + static_cast(::operator new[](kItemSize)))); + } + drop.resize(0); + + // there are at least 64 items per span here. (8/10)^64 = 6.2e-7 ~= 0 + // probability we actually managed to free a page; every page is fragmented. + // We still have 20% or so of it allocated, so we should see 80% of it + // charged to these allocations as fragmentations. + auto profile = MallocExtension::SnapshotCurrent(ProfileType::kFragmentation); + + // Pull out the fragmentationz entry corresponding to this + size_t requested_size = 0; + size_t allocated_size = 0; + size_t sum = 0; + size_t count = 0; + profile.Iterate([&](const Profile::Sample &e) { + if (e.requested_size != kItemSize) return; + + if (requested_size == 0) { + allocated_size = e.allocated_size; + requested_size = e.requested_size; + } else { + // we will usually have single entry in + // profile, but in builds without optimization + // our fast-path code causes same call-site to + // have two different stack traces. Thus we + // expect and deal with second entry for same + // allocation. + EXPECT_EQ(requested_size, e.requested_size); + EXPECT_EQ(allocated_size, e.allocated_size); + } + sum += e.sum; + count += e.count; + }); + + double frag_bytes = sum; + double real_frag_bytes = + static_cast(allocated_size * kNumItems) * 0.8; + // We should be pretty close with this much data: + // TODO(b/134690164): this is still slightly flaky (<1%) - why? + EXPECT_NEAR(real_frag_bytes, frag_bytes, real_frag_bytes * 0.15) + << " sum = " << sum << " allocated = " << allocated_size + << " requested = " << requested_size << " count = " << count; +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/realloc_unittest.cc b/tcmalloc/realloc_unittest.cc new file mode 100644 index 000000000..c8fd4589b --- /dev/null +++ b/tcmalloc/realloc_unittest.cc @@ -0,0 +1,104 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Test realloc() functionality + +#include +#include +#include +#include +#include + +#include +#include + +#include "benchmark/benchmark.h" +#include "gtest/gtest.h" +#include "absl/random/random.h" + +namespace tcmalloc { +namespace { + +// Fill a buffer of the specified size with a predetermined pattern +void Fill(unsigned char* buffer, int n) { + for (int i = 0; i < n; i++) { + buffer[i] = (i & 0xff); + } +} + +// Check that the specified buffer has the predetermined pattern +// generated by Fill() +void ExpectValid(unsigned char* buffer, int n) { + for (int i = 0; i < n; i++) { + ASSERT_EQ((i & 0xff), buffer[i]); + } +} + +// Return the next interesting size/delta to check. Returns -1 if no more. +int NextSize(int size) { + if (size < 100) { + return size+1; + } else if (size < 100000) { + // Find next power of two + int power = 1; + while (power < size) { + power <<= 1; + } + + // Yield (power-1, power, power+1) + if (size < power-1) { + return power-1; + } else if (size == power-1) { + return power; + } else { + assert(size == power); + return power+1; + } + } else { + return -1; + } +} + +TEST(ReallocTest, TestWithinCache) { + for (int src_size = 0; src_size >= 0; src_size = NextSize(src_size)) { + for (int dst_size = 0; dst_size >= 0; dst_size = NextSize(dst_size)) { + unsigned char* src = static_cast(malloc(src_size)); + Fill(src, src_size); + unsigned char* dst = static_cast(realloc(src, dst_size)); + ExpectValid(dst, std::min(src_size, dst_size)); + Fill(dst, dst_size); + ExpectValid(dst, dst_size); + if (dst != nullptr) free(dst); + } + } +} + +TEST(ReallocTest, AlignedAllocRealloc) { + std::pair sizes[] = {{1024, 2048}, {512, 128}}; + + for (const auto& p : sizes) { + size_t src_size = p.first, dst_size = p.second; + + auto src = static_cast(aligned_alloc(32, src_size)); + Fill(src, src_size); + auto dst = static_cast(realloc(src, dst_size)); + ExpectValid(dst, std::min(src_size, dst_size)); + Fill(dst, dst_size); + ExpectValid(dst, dst_size); + if (dst != nullptr) free(dst); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/runtime_size_classes.cc b/tcmalloc/runtime_size_classes.cc new file mode 100644 index 000000000..3ebc7ddde --- /dev/null +++ b/tcmalloc/runtime_size_classes.cc @@ -0,0 +1,80 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/runtime_size_classes.h" + +#include + +#include "absl/base/attributes.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/util.h" + +using tcmalloc::kLog; + +namespace tcmalloc { + +namespace internal { + +int ParseSizeClasses(absl::string_view env, int max_size, int max_classes, + SizeClassInfo* parsed) { + int c = 1; + int t = 0; + memset(parsed, 0, sizeof(parsed[0]) * max_classes); + for (char e : env) { + // TODO(b/120885588): replace with absl::from_chars, once it is fully + // implemented. + if ('0' <= e && e <= '9') { + int n = e - '0'; + int v = 10 * parsed[c].Value(t) + n; + if (v > max_size) { + Log(kLog, __FILE__, __LINE__, "size class integer overflow", v, n); + return -3; + } + parsed[c].SetValue(t, v); + } else if (e == ';') { + // next size class + t = 0; + c++; + if (c >= max_classes) { + return c; + } + } else if (e == ',') { + t++; + if (t >= kSizeClassInfoMembers) { + Log(kLog, __FILE__, __LINE__, "size class too many commas", c); + return -1; + } + } else { + Log(kLog, __FILE__, __LINE__, "Delimiter not , or ;", c, e); + return -2; + } + } + // The size class [0, 0, 0] counts as a size class, but is not parsed. + return c + 1; +} + +} // namespace internal + +int ABSL_ATTRIBUTE_NOINLINE MaybeSizeClassesFromEnv(int max_size, + int max_classes, + SizeClassInfo* parsed) { + const char* e = + tcmalloc::tcmalloc_internal::thread_safe_getenv("TCMALLOC_SIZE_CLASSES"); + if (!e) { + return 0; + } + return internal::ParseSizeClasses(e, max_size, max_classes, parsed); +} + +} // namespace tcmalloc diff --git a/tcmalloc/runtime_size_classes.h b/tcmalloc/runtime_size_classes.h new file mode 100644 index 000000000..5ca740e65 --- /dev/null +++ b/tcmalloc/runtime_size_classes.h @@ -0,0 +1,45 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Run-time specification of Size classes +#ifndef TCMALLOC_RUNTIME_SIZE_CLASSES_H_ +#define TCMALLOC_RUNTIME_SIZE_CLASSES_H_ + +#include "absl/strings/string_view.h" +#include "tcmalloc/size_class_info.h" + +namespace tcmalloc { +namespace internal { + +// Set size classes from a string. +// Format: "size,pages,num_to_move;" +// Example: "8,1,32;16;32;40,1,16;128,2;256;512" +// This function doesn't do validity checking. If a field is missing, its +// value is set to zero. +// The number of size classes parsed is returned. +int ParseSizeClasses(absl::string_view env, int max_size, int max_classes, + SizeClassInfo* parsed); + +} // namespace internal + +// If the environment variable TCMALLOC_SIZE_CLASSES is defined, its value is +// parsed using ParseSizeClasses and ApplySizeClassDefaults into parsed. The +// number of size classes parsed is returned. On error, a negative value is +// returned. +int MaybeSizeClassesFromEnv(int max_size, int max_classes, + SizeClassInfo* parsed); + +} // namespace tcmalloc + +#endif // TCMALLOC_RUNTIME_SIZE_CLASSES_H_ diff --git a/tcmalloc/runtime_size_classes_test.cc b/tcmalloc/runtime_size_classes_test.cc new file mode 100644 index 000000000..c6e12cccd --- /dev/null +++ b/tcmalloc/runtime_size_classes_test.cc @@ -0,0 +1,113 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/runtime_size_classes.h" + +#include + +#include "gtest/gtest.h" + +using tcmalloc::internal::ParseSizeClasses; + +namespace tcmalloc { + +namespace { + +constexpr int kNumClasses = 4; +constexpr int kMaxSize = 1024 * 1024; + +TEST(RuntimeSizeClassesTest, EnvSingleFullClass) { + // Validate simple parsing. + SizeClassInfo parsed[kNumClasses]; + EXPECT_EQ(ParseSizeClasses("8,1,32", kMaxSize, kNumClasses, parsed), 2); + EXPECT_EQ(parsed[1].size, 8); + EXPECT_EQ(parsed[1].pages, 1); + EXPECT_EQ(parsed[1].num_to_move, 32); + + EXPECT_EQ(parsed[0].size, 0); + EXPECT_EQ(parsed[0].pages, 0); + EXPECT_EQ(parsed[0].num_to_move, 0); +} + +TEST(RuntimeSizeClassesTest, EnvSingleSizeOnlyClass) { + // Validate simple parsing. + SizeClassInfo parsed[kNumClasses]; + EXPECT_EQ(ParseSizeClasses("8,1,2", kMaxSize, kNumClasses, parsed), 2); + EXPECT_EQ(parsed[1].size, 8); + EXPECT_EQ(parsed[1].pages, 1); + EXPECT_EQ(parsed[1].num_to_move, 2); +} + +TEST(RuntimeSizeClassesTest, EnvTwoFullClasses) { + // Validate two classes + SizeClassInfo parsed[kNumClasses]; + EXPECT_EQ(ParseSizeClasses("8,1,32;1024,2,16", kMaxSize, kNumClasses, parsed), + 3); + EXPECT_EQ(parsed[1].size, 8); + EXPECT_EQ(parsed[1].pages, 1); + EXPECT_EQ(parsed[1].num_to_move, 32); + + EXPECT_EQ(parsed[2].size, 1024); + EXPECT_EQ(parsed[2].pages, 2); + EXPECT_EQ(parsed[2].num_to_move, 16); +} + +TEST(RuntimeSizeClassesTest, ParseArrayLimit) { + // Validate that the limit on the number of size classes is enforced. + SizeClassInfo parsed[kNumClasses] = { + {0, 0, 0}, + {9, 9, 9}, + {7, 7, 7}, + }; + EXPECT_EQ(ParseSizeClasses("8,1,32;1024,2,16", kMaxSize, 2, parsed), 2); + + EXPECT_EQ(parsed[1].size, 8); + EXPECT_EQ(parsed[1].pages, 1); + EXPECT_EQ(parsed[1].num_to_move, 32); + + EXPECT_EQ(parsed[2].size, 7); + EXPECT_EQ(parsed[2].pages, 7); + EXPECT_EQ(parsed[2].num_to_move, 7); +} + +TEST(RuntimeSizeClassesTest, EnvBadDelimiter) { + // Invalid class sizes should be caught + SizeClassInfo parsed[kNumClasses]; + EXPECT_EQ(ParseSizeClasses("8/4,16,3,1", kMaxSize, kNumClasses, parsed), -2); +} + +TEST(RuntimeSizeClassesTest, EnvTooManyCommas) { + // Invalid class sizes should be caught + SizeClassInfo parsed[kNumClasses]; + EXPECT_EQ(ParseSizeClasses("8,4,16,3", kMaxSize, kNumClasses, parsed), -1); +} + +TEST(RuntimeSizeClassesTest, EnvIntOverflow) { + // Invalid class sizes should be caught + SizeClassInfo parsed[kNumClasses]; + EXPECT_EQ(ParseSizeClasses("8,4,2147483648", kMaxSize, kNumClasses, parsed), + -3); +} + +TEST(RuntimeSizeClassesTest, EnvVariableExamined) { + SizeClassInfo parsed[kNumClasses]; + setenv("TCMALLOC_SIZE_CLASSES", "256,13,31", 1); + EXPECT_EQ(MaybeSizeClassesFromEnv(kMaxSize, kNumClasses, parsed), 2); + EXPECT_EQ(parsed[1].size, 256); + EXPECT_EQ(parsed[1].pages, 13); + EXPECT_EQ(parsed[1].num_to_move, 31); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/sampler.cc b/tcmalloc/sampler.cc new file mode 100644 index 000000000..2df746551 --- /dev/null +++ b/tcmalloc/sampler.cc @@ -0,0 +1,196 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/sampler.h" + +#include +#include +#include +#include + +#include "tcmalloc/common.h" +#include "tcmalloc/parameters.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { + +ssize_t Sampler::GetSamplePeriod() { + return Parameters::profile_sampling_rate(); +} + +// Run this before using your sampler +ABSL_ATTRIBUTE_NOINLINE void Sampler::Init(uint64_t seed) { + ASSERT(seed != 0); + + // do_malloc comes here without having initialized statics, and + // PickNextSamplingPoint uses data initialized in static vars. + Static::InitIfNecessary(); + + // Initialize PRNG + rnd_ = seed; + // Step it forward 20 times for good measure + for (int i = 0; i < 20; i++) { + rnd_ = NextRandom(rnd_); + } + // Initialize counters + true_bytes_until_sample_ = PickNextSamplingPoint(); + if (Static::IsOnFastPath()) { + bytes_until_sample_ = true_bytes_until_sample_; + was_on_fast_path_ = true; + } else { + // Force the next allocation to hit the slow path. + ASSERT(bytes_until_sample_ == 0); + was_on_fast_path_ = false; + } + allocs_until_guarded_sample_ = PickNextGuardedSamplingPoint(); +} + +ssize_t Sampler::PickNextSamplingPoint() { + sample_period_ = GetSamplePeriod(); + if (sample_period_ <= 0) { + // In this case, we don't want to sample ever, and the larger a + // value we put here, the longer until we hit the slow path + // again. However, we have to support the flag changing at + // runtime, so pick something reasonably large (to keep overhead + // low) but small enough that we'll eventually start to sample + // again. + return 128 << 20; + } + if (ABSL_PREDICT_FALSE(sample_period_ == 1)) { + // A sample period of 1, generally used only in tests due to its exorbitant + // cost, is a request for *every* allocation to be sampled. + return 1; + } + return GetGeometricVariable(sample_period_); +} + +ssize_t Sampler::PickNextGuardedSamplingPoint() { + double guarded_sample_rate = Parameters::guarded_sampling_rate(); + double profile_sample_rate = Parameters::profile_sampling_rate(); + if (guarded_sample_rate < 0 || profile_sample_rate <= 0) { + // Guarded sampling is disabled but could be turned on at run time. So we + // return a sampling point (default mean=100) in case guarded sampling is + // later enabled. Since the flag is also checked in + // ShouldSampleGuardedAllocation(), guarded sampling is still guaranteed + // not to run until it is enabled. + return GetGeometricVariable(/*mean=*/100); + } + return GetGeometricVariable( + std::ceil(guarded_sample_rate / profile_sample_rate)); +} + +// Generates a geometric variable with the specified mean. +// This is done by generating a random number between 0 and 1 and applying +// the inverse cumulative distribution function for an exponential. +// Specifically: Let m be the inverse of the sample period, then +// the probability distribution function is m*exp(-mx) so the CDF is +// p = 1 - exp(-mx), so +// q = 1 - p = exp(-mx) +// log_e(q) = -mx +// -log_e(q)/m = x +// log_2(q) * (-log_e(2) * 1/m) = x +// In the code, q is actually in the range 1 to 2**26, hence the -26 below +ssize_t Sampler::GetGeometricVariable(ssize_t mean) { + rnd_ = NextRandom(rnd_); + // Take the top 26 bits as the random number + // (This plus the 1<<58 sampling bound give a max possible step of + // 5194297183973780480 bytes.) + const uint64_t prng_mod_power = 48; // Number of bits in prng + // The uint32_t cast is to prevent a (hard-to-reproduce) NAN + // under piii debug for some binaries. + double q = static_cast(rnd_ >> (prng_mod_power - 26)) + 1.0; + // Put the computed p-value through the CDF of a geometric. + double interval = (std::log2(q) - 26) * (-std::log(2.0) * mean); + + // Very large values of interval overflow ssize_t. If we happen to + // hit such improbable condition, we simply cheat and clamp interval + // to largest supported value. + return static_cast( + std::min(interval, std::numeric_limits::max())); +} + +size_t Sampler::RecordAllocationSlow(size_t k) { + static std::atomic global_randomness; + + if (ABSL_PREDICT_FALSE(!initialized_)) { + initialized_ = true; + uint64_t global_seed = + global_randomness.fetch_add(1, std::memory_order_relaxed); + Init(reinterpret_cast(this) ^ global_seed); + if (static_cast(true_bytes_until_sample_) > k) { + true_bytes_until_sample_ -= k; + if (Static::IsOnFastPath()) { + bytes_until_sample_ -= k; + was_on_fast_path_ = true; + } + return 0; + } + } + + if (ABSL_PREDICT_FALSE(true_bytes_until_sample_ > k)) { + // The last time we picked a sampling point, we were on the slow path. We + // don't want to sample yet since true_bytes_until_sample_ >= k. + true_bytes_until_sample_ -= k; + + if (ABSL_PREDICT_TRUE(Static::IsOnFastPath())) { + // We've moved from the slow path to the fast path since the last sampling + // point was picked. + bytes_until_sample_ = true_bytes_until_sample_; + true_bytes_until_sample_ = 0; + was_on_fast_path_ = true; + } else { + bytes_until_sample_ = 0; + was_on_fast_path_ = false; + } + + return 0; + } + + // Compute sampling weight (i.e. the number of bytes represented by this + // sample in expectation). + // + // Let k be the size of the allocation, p be the sample period + // (sample_period_), and f the number of bytes after which we decided to + // sample (either bytes_until_sample_ or true_bytes_until_sample_). On + // average, if we were to continue taking samples every p bytes, we would take + // (k - f) / p additional samples in this allocation, plus the one we are + // taking now, for 1 + (k - f) / p total samples. Multiplying by p, the mean + // number of bytes between samples, gives us a weight of p + k - f. + // + size_t weight = + sample_period_ + k - + (was_on_fast_path_ ? bytes_until_sample_ : true_bytes_until_sample_); + const auto point = PickNextSamplingPoint(); + if (ABSL_PREDICT_TRUE(Static::IsOnFastPath())) { + bytes_until_sample_ = point; + true_bytes_until_sample_ = 0; + was_on_fast_path_ = true; + } else { + bytes_until_sample_ = 0; + true_bytes_until_sample_ = point; + was_on_fast_path_ = false; + } + return GetSamplePeriod() <= 0 ? 0 : weight; +} + +double AllocatedBytes(const StackTrace& stack, bool unsample) { + if (unsample) { + return stack.weight * stack.allocated_size * 1.0 / + (stack.requested_size + 1); + } else { + return stack.allocated_size; + } +} + +} // namespace tcmalloc diff --git a/tcmalloc/sampler.h b/tcmalloc/sampler.h new file mode 100644 index 000000000..f0ba1ab6d --- /dev/null +++ b/tcmalloc/sampler.h @@ -0,0 +1,294 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_SAMPLER_H_ +#define TCMALLOC_SAMPLER_H_ + +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/optimization.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/parameters.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { + +//------------------------------------------------------------------- +// Sampler to decide when to create a sample trace for an allocation +// Not thread safe: Each thread should have it's own sampler object. +// Caller must use external synchronization if used +// from multiple threads. +// +// With 512K average sample step (the default): +// the probability of sampling a 4K allocation is about 0.00778 +// the probability of sampling a 1MB allocation is about 0.865 +// the probability of sampling a 1GB allocation is about 1.00000 +// In general, the probablity of sampling is an allocation of size X +// given a flag value of Y (default 1M) is: +// 1 - e^(-X/Y) +// +// With 128K average sample step: +// the probability of sampling a 1MB allocation is about 0.99966 +// the probability of sampling a 1GB allocation is about 1.0 +// (about 1 - 2**(-26)) +// With 1M average sample step: +// the probability of sampling a 4K allocation is about 0.00390 +// the probability of sampling a 1MB allocation is about 0.632 +// the probability of sampling a 1GB allocation is about 1.0 +// +// The sampler works by representing memory as a long stream from +// which allocations are taken. Some of the bytes in this stream are +// marked and if an allocation includes a marked byte then it is +// sampled. Bytes are marked according to a Poisson point process +// with each byte being marked independently with probability +// p = 1/profile_sampling_rate. This makes the probability +// of sampling an allocation of X bytes equal to the CDF of +// a geometric with mean profile_sampling_rate. (ie. the +// probability that at least one byte in the range is marked). This +// is accurately given by the CDF of the corresponding exponential +// distribution : 1 - e^(-X/profile_sampling_rate) +// Independence of the byte marking ensures independence of +// the sampling of each allocation. +// +// This scheme is implemented by noting that, starting from any +// fixed place, the number of bytes until the next marked byte +// is geometrically distributed. This number is recorded as +// bytes_until_sample_. Every allocation subtracts from this +// number until it is less than 0. When this happens the current +// allocation is sampled. +// +// When an allocation occurs, bytes_until_sample_ is reset to +// a new independtly sampled geometric number of bytes. The +// memoryless property of the point process means that this may +// be taken as the number of bytes after the end of the current +// allocation until the next marked byte. This ensures that +// very large allocations which would intersect many marked bytes +// only result in a single call to PickNextSamplingPoint. +//------------------------------------------------------------------- + +class SamplerTest; + +class Sampler { + public: + // Record allocation of "k" bytes. If the allocation needs to be sampled, + // return its sampling weight (i.e., the expected number of allocations of + // this size represented by this sample); otherwise return 0. + size_t RecordAllocation(size_t k); + + // Same as above (but faster), except: + // a) REQUIRES(k < std::numeric_limits::max()) + // b) if this returns false, you must call RecordAllocation + // to confirm if sampling truly needed. + // + // The point of this function is to only deal with common case of no + // sampling and let caller (which is in malloc fast-path) to + // "escalate" to fuller and slower logic only if necessary. + bool TryRecordAllocationFast(size_t k); + + // If the guarded sampling point has been reached, selects a new sampling + // point and returns true. Otherwise returns false. + bool ShouldSampleGuardedAllocation(); + + // Returns the Sampler's cached Static::IsOnFastPath state. This may differ + // from a fresh computation due to activating per-CPU mode or the + // addition/removal of hooks. + bool IsOnFastPath() const; + void UpdateFastPathState(); + + // Generate a geometric with mean profile_sampling_rate. + // + // Remembers the value of sample_rate for use in reweighing the sample + // later (so that if the flag value changes before the next sample is taken, + // the next sample is still weighed properly). + ssize_t PickNextSamplingPoint(); + + // Generates a geometric with mean guarded_sample_rate. + ssize_t PickNextGuardedSamplingPoint(); + + // Returns the current sample period + static ssize_t GetSamplePeriod(); + + // The following are public for the purposes of testing + static uint64_t NextRandom(uint64_t rnd_); // Returns the next prng value + + constexpr Sampler() + : bytes_until_sample_(0), + sample_period_(0), + true_bytes_until_sample_(0), + allocs_until_guarded_sample_(0), + rnd_(0), + initialized_(false), + was_on_fast_path_(false) {} + + private: + // Bytes until we sample next. + // + // More specifically when bytes_until_sample_ is X, we can allocate + // X bytes without triggering sampling; on the (X+1)th allocated + // byte, the containing allocation will be sampled. + // + // Always non-negative with only very brief exceptions (see + // DecrementFast{,Finish}, so casting to size_t is ok. + ssize_t bytes_until_sample_; + + // Saved copy of the sampling period from when we actually set + // (true_)bytes_until_sample_. This allows us to properly calculate the sample + // weight of the first sample after the sampling period is changed. + ssize_t sample_period_; + + // true_bytes_until_sample_ tracks the sampling point when we are on the slow + // path when picking sampling points (!Static::IsOnFastPath()) up until we + // notice (due to another allocation) that this state has changed. + ssize_t true_bytes_until_sample_; + + // Number of sampled allocations until we do a guarded allocation. + ssize_t allocs_until_guarded_sample_; + + uint64_t rnd_; // Cheap random number generator + bool initialized_; + bool was_on_fast_path_; + + private: + friend class SamplerTest; + // Initialize this sampler. + void Init(uint64_t seed); + size_t RecordAllocationSlow(size_t k); + ssize_t GetGeometricVariable(ssize_t mean); +}; + +inline size_t Sampler::RecordAllocation(size_t k) { + // The first time we enter this function we expect bytes_until_sample_ + // to be zero, and we must call SampleAllocationSlow() to ensure + // proper initialization of static vars. + ASSERT(Static::IsInited() || bytes_until_sample_ == 0); + + // Avoid missampling 0. + k++; + + // Note that we have to deal with arbitrarily large values of k + // here. Thus we're upcasting bytes_until_sample_ to unsigned rather + // than the other way around. And this is why this code cannot be + // merged with DecrementFast code below. + if (static_cast(bytes_until_sample_) <= k) { + size_t result = RecordAllocationSlow(k); + ASSERT(Static::IsInited()); + return result; + } else { + bytes_until_sample_ -= k; + ASSERT(Static::IsInited()); + return 0; + } +} + +inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE +Sampler::TryRecordAllocationFast(size_t k) { + // Avoid missampling 0. Callers pass in requested size (which based on the + // assertion below k>=0 at this point). Since subtracting 0 from + // bytes_until_sample_ is a no-op, we increment k by one and resolve the + // effect on the distribution in Sampler::Unsample. + k++; + + // For efficiency reason, we're testing bytes_until_sample_ after + // decrementing it by k. This allows compiler to do sub , + // followed by conditional jump on sign. But it is correct only if k + // is actually smaller than largest ssize_t value. Otherwise + // converting k to signed value overflows. + // + // It would be great for generated code to be sub , + // followed by conditional jump on 'carry', which would work for + // arbitrary values of k, but there seem to be no way to express + // that in C++. + // + // Our API contract explicitly states that only small values of k + // are permitted. And thus it makes sense to assert on that. + ASSERT(static_cast(k) > 0); + + bytes_until_sample_ -= static_cast(k); + if (ABSL_PREDICT_FALSE(bytes_until_sample_ <= 0)) { + // Note, we undo sampling counter update, since we're not actually + // handling slow path in the "needs sampling" case (calling + // RecordAllocationSlow to reset counter). And we do that in order + // to avoid non-tail calls in malloc fast-path. See also comments + // on declaration inside Sampler class. + // + // volatile is used here to improve compiler's choice of + // instuctions. We know that this path is very rare and that there + // is no need to keep previous value of bytes_until_sample_ in + // register. This helps compiler generate slightly more efficient + // sub , instruction for subtraction above. + volatile ssize_t *ptr = + const_cast(&bytes_until_sample_); + *ptr += k; + return false; + } + return true; +} + +inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE +Sampler::ShouldSampleGuardedAllocation() { + if (Parameters::guarded_sampling_rate() < 0) return false; + allocs_until_guarded_sample_--; + if (ABSL_PREDICT_FALSE(allocs_until_guarded_sample_ < 0)) { + allocs_until_guarded_sample_ = PickNextGuardedSamplingPoint(); + return true; + } + return false; +} + +// Inline functions which are public for testing purposes + +// Returns the next prng value. +// pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48 +// This is the lrand64 generator. +inline uint64_t Sampler::NextRandom(uint64_t rnd) { + const uint64_t prng_mult = UINT64_C(0x5DEECE66D); + const uint64_t prng_add = 0xB; + const uint64_t prng_mod_power = 48; + const uint64_t prng_mod_mask = + ~((~static_cast(0)) << prng_mod_power); + return (prng_mult * rnd + prng_add) & prng_mod_mask; +} + +inline bool Sampler::IsOnFastPath() const { return was_on_fast_path_; } + +inline void Sampler::UpdateFastPathState() { + const bool is_on_fast_path = Static::IsOnFastPath(); + if (ABSL_PREDICT_TRUE(was_on_fast_path_ == is_on_fast_path)) { + return; + } + + was_on_fast_path_ = is_on_fast_path; + + if (is_on_fast_path) { + bytes_until_sample_ = true_bytes_until_sample_; + true_bytes_until_sample_ = 0; + } else { + true_bytes_until_sample_ = bytes_until_sample_; + bytes_until_sample_ = 0; + } +} + +// If unsample is true, return the approximate number of bytes that would have +// been allocated to obtain this sample. This is only accurate if the sample +// period hasn't changed since the allocation(s) were made. +// +// If unsample is false, the caller will handle unsampling. +double AllocatedBytes(const StackTrace &stack, bool unsample); + +} // namespace tcmalloc + +#endif // TCMALLOC_SAMPLER_H_ diff --git a/tcmalloc/size_class_info.h b/tcmalloc/size_class_info.h new file mode 100644 index 000000000..3cfea9d51 --- /dev/null +++ b/tcmalloc/size_class_info.h @@ -0,0 +1,75 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Specification of Size classes +#ifndef TCMALLOC_size_class_info_H_ +#define TCMALLOC_size_class_info_H_ + +#include + +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { + +// The number of members in SizeClassInfo +static constexpr int kSizeClassInfoMembers = 3; + +// Precomputed size class parameters. +struct SizeClassInfo { + int Value(int index) const { + switch (index) { + case 0: + return size; + case 1: + return pages; + case 2: + return num_to_move; + } + CHECK_CONDITION(index < kSizeClassInfoMembers); + return 0; + } + + void SetValue(int index, size_t v) { + switch (index) { + case 0: + size = v; + break; + case 1: + pages = v; + break; + case 2: + num_to_move = v; + break; + default: + CHECK_CONDITION(index < kSizeClassInfoMembers); + } + } + + // Max size storable in that class + size_t size; + + // Number of pages to allocate at a time + size_t pages; + + // Number of objects to move between a per-thread list and a central list in + // one shot. We want this to be not too small so we can amortize the lock + // overhead for accessing the central list. Making it too big may temporarily + // cause unnecessary memory wastage in the per-thread free list until the + // scavenger cleans up the list. + size_t num_to_move; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_size_class_info_H_ diff --git a/tcmalloc/size_classes.cc b/tcmalloc/size_classes.cc new file mode 100644 index 000000000..2a775c4a2 --- /dev/null +++ b/tcmalloc/size_classes.cc @@ -0,0 +1,682 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/common.h" + +namespace tcmalloc { + +// is fixed per-size-class overhead due to end-of-span fragmentation +// and other factors. For instance, if we have a 96 byte size class, and use a +// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes +// left over. There is also a fixed component of 48 bytes of TCMalloc metadata +// per span. Together, the fixed overhead would be wasted/allocated = +// (32 + 48) / (8192 - 32) ~= 0.98%. +// There is also a dynamic component to overhead based on mismatches between the +// number of bytes requested and the number of bytes provided by the size class. +// Together they sum to the total overhead; for instance if you asked for a +// 50-byte allocation that rounds up to a 64-byte size class, the dynamic +// overhead would be 28%, and if were 22% it would mean (on average) +// 25 bytes of overhead for allocations of that size. + +// clang-format off +#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8 +#if TCMALLOC_PAGE_SHIFT == 13 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.59% + { 16, 1, 32}, // 0.59% + { 24, 1, 32}, // 0.68% + { 32, 1, 32}, // 0.59% + { 40, 1, 32}, // 0.98% + { 48, 1, 32}, // 0.98% + { 56, 1, 32}, // 0.78% + { 64, 1, 32}, // 0.59% + { 72, 1, 32}, // 1.28% + { 80, 1, 32}, // 0.98% + { 88, 1, 32}, // 0.68% + { 96, 1, 32}, // 0.98% + { 104, 1, 32}, // 1.58% + { 112, 1, 32}, // 0.78% + { 120, 1, 32}, // 0.98% + { 128, 1, 32}, // 0.59% + { 136, 1, 32}, // 0.98% + { 144, 1, 32}, // 2.18% + { 160, 1, 32}, // 0.98% + { 176, 1, 32}, // 1.78% + { 184, 1, 32}, // 1.78% + { 192, 1, 32}, // 2.18% + { 208, 1, 32}, // 1.58% + { 224, 1, 32}, // 2.18% + { 240, 1, 32}, // 0.98% + { 256, 1, 32}, // 0.59% + { 272, 1, 32}, // 0.98% + { 280, 1, 32}, // 1.48% + { 296, 1, 32}, // 3.10% + { 312, 1, 32}, // 1.58% + { 336, 1, 32}, // 2.18% + { 368, 1, 32}, // 1.78% + { 408, 1, 32}, // 0.98% + { 448, 1, 32}, // 2.18% + { 480, 1, 32}, // 0.98% + { 512, 1, 32}, // 0.59% + { 576, 1, 32}, // 2.18% + { 640, 1, 32}, // 7.29% + { 704, 1, 32}, // 6.40% + { 768, 1, 32}, // 7.29% + { 896, 1, 32}, // 2.18% + { 1024, 1, 32}, // 0.59% + { 1152, 2, 32}, // 1.88% + { 1280, 2, 32}, // 6.98% + { 1408, 2, 32}, // 6.10% + { 1536, 2, 32}, // 6.98% + { 1792, 2, 32}, // 1.88% + { 2048, 2, 32}, // 0.29% + { 2304, 2, 28}, // 1.88% + { 2688, 2, 24}, // 1.88% + { 2816, 3, 23}, // 9.30% + { 3200, 2, 20}, // 2.70% + { 3456, 3, 18}, // 1.79% + { 3584, 4, 18}, // 1.74% + { 4096, 2, 16}, // 0.29% + { 4736, 3, 13}, // 3.99% + { 5376, 2, 12}, // 1.88% + { 6144, 3, 10}, // 0.20% + { 6528, 4, 10}, // 0.54% + { 7168, 7, 9}, // 0.08% + { 8192, 2, 8}, // 0.29% + { 9472, 5, 6}, // 8.23% + { 10240, 4, 6}, // 6.82% + { 12288, 3, 5}, // 0.20% + { 14336, 7, 4}, // 0.08% + { 16384, 2, 4}, // 0.29% + { 20480, 5, 3}, // 0.12% + { 24576, 3, 2}, // 0.20% + { 28672, 7, 2}, // 0.08% + { 32768, 4, 2}, // 0.15% + { 40960, 5, 2}, // 0.12% + { 49152, 6, 2}, // 0.10% + { 57344, 7, 2}, // 0.08% + { 65536, 8, 2}, // 0.07% + { 73728, 9, 2}, // 0.07% + { 81920, 10, 2}, // 0.06% + { 90112, 11, 2}, // 0.05% + { 106496, 13, 2}, // 0.05% + { 131072, 16, 2}, // 0.04% + { 139264, 17, 2}, // 0.03% + { 163840, 20, 2}, // 0.03% + { 180224, 22, 2}, // 0.03% + { 204800, 25, 2}, // 0.02% + { 237568, 29, 2}, // 0.02% + { 262144, 32, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 15 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.15% + { 16, 1, 32}, // 0.15% + { 24, 1, 32}, // 0.17% + { 32, 1, 32}, // 0.15% + { 40, 1, 32}, // 0.17% + { 48, 1, 32}, // 0.24% + { 56, 1, 32}, // 0.17% + { 64, 1, 32}, // 0.15% + { 72, 1, 32}, // 0.17% + { 80, 1, 32}, // 0.29% + { 88, 1, 32}, // 0.24% + { 96, 1, 32}, // 0.24% + { 104, 1, 32}, // 0.17% + { 112, 1, 32}, // 0.34% + { 120, 1, 32}, // 0.17% + { 128, 1, 32}, // 0.15% + { 144, 1, 32}, // 0.39% + { 160, 1, 32}, // 0.54% + { 176, 1, 32}, // 0.24% + { 184, 1, 32}, // 0.20% + { 200, 1, 32}, // 0.66% + { 224, 1, 32}, // 0.34% + { 256, 1, 32}, // 0.15% + { 280, 1, 32}, // 0.17% + { 304, 1, 32}, // 0.89% + { 328, 1, 32}, // 1.06% + { 352, 1, 32}, // 0.24% + { 384, 1, 32}, // 0.54% + { 424, 1, 32}, // 0.51% + { 464, 1, 32}, // 1.03% + { 512, 1, 32}, // 0.15% + { 576, 1, 32}, // 1.74% + { 640, 1, 32}, // 0.54% + { 704, 1, 32}, // 1.33% + { 768, 1, 32}, // 1.74% + { 832, 1, 32}, // 1.13% + { 960, 1, 32}, // 0.54% + { 1024, 1, 32}, // 0.15% + { 1152, 1, 32}, // 1.74% + { 1280, 1, 32}, // 2.55% + { 1408, 1, 32}, // 1.33% + { 1536, 1, 32}, // 1.74% + { 1664, 1, 32}, // 3.80% + { 1920, 1, 32}, // 0.54% + { 2048, 1, 32}, // 0.15% + { 2176, 1, 30}, // 0.54% + { 2304, 1, 28}, // 1.74% + { 2688, 1, 24}, // 1.74% + { 2944, 1, 22}, // 1.33% + { 3584, 1, 18}, // 1.74% + { 4096, 1, 16}, // 0.15% + { 4608, 1, 14}, // 1.74% + { 5376, 1, 12}, // 1.74% + { 6528, 1, 10}, // 0.54% + { 8192, 1, 8}, // 0.15% + { 9344, 2, 7}, // 0.27% + { 10880, 1, 6}, // 0.54% + { 13056, 2, 5}, // 0.47% + { 14336, 4, 4}, // 1.62% + { 16384, 1, 4}, // 0.15% + { 19072, 3, 3}, // 3.14% + { 21760, 2, 3}, // 0.47% + { 24576, 3, 2}, // 0.05% + { 27264, 5, 2}, // 0.19% + { 32768, 1, 2}, // 0.15% + { 38144, 5, 2}, // 7.41% + { 49152, 3, 2}, // 0.05% + { 54528, 5, 2}, // 0.19% + { 65536, 2, 2}, // 0.07% + { 81920, 5, 2}, // 0.03% + { 98304, 3, 2}, // 0.05% + { 114688, 7, 2}, // 0.02% + { 131072, 4, 2}, // 0.04% + { 163840, 5, 2}, // 0.03% + { 196608, 6, 2}, // 0.02% + { 229376, 7, 2}, // 0.02% + { 262144, 8, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 18 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.02% + { 16, 1, 32}, // 0.02% + { 24, 1, 32}, // 0.02% + { 32, 1, 32}, // 0.02% + { 40, 1, 32}, // 0.03% + { 48, 1, 32}, // 0.02% + { 56, 1, 32}, // 0.02% + { 64, 1, 32}, // 0.02% + { 72, 1, 32}, // 0.04% + { 80, 1, 32}, // 0.04% + { 88, 1, 32}, // 0.05% + { 96, 1, 32}, // 0.04% + { 112, 1, 32}, // 0.04% + { 128, 1, 32}, // 0.02% + { 144, 1, 32}, // 0.04% + { 160, 1, 32}, // 0.04% + { 184, 1, 32}, // 0.07% + { 200, 1, 32}, // 0.07% + { 224, 1, 32}, // 0.04% + { 256, 1, 32}, // 0.02% + { 288, 1, 32}, // 0.04% + { 312, 1, 32}, // 0.04% + { 344, 1, 32}, // 0.02% + { 416, 1, 32}, // 0.04% + { 472, 1, 32}, // 0.09% + { 512, 1, 32}, // 0.02% + { 576, 1, 32}, // 0.04% + { 640, 1, 32}, // 0.17% + { 704, 1, 32}, // 0.12% + { 768, 1, 32}, // 0.12% + { 832, 1, 32}, // 0.04% + { 1024, 1, 32}, // 0.02% + { 1152, 1, 32}, // 0.26% + { 1280, 1, 32}, // 0.41% + { 1408, 1, 32}, // 0.12% + { 1664, 1, 32}, // 0.36% + { 1792, 1, 32}, // 0.21% + { 1920, 1, 32}, // 0.41% + { 2048, 1, 32}, // 0.02% + { 2176, 1, 30}, // 0.41% + { 2304, 1, 28}, // 0.71% + { 2560, 1, 25}, // 0.41% + { 2944, 1, 22}, // 0.07% + { 3328, 1, 19}, // 1.00% + { 3584, 1, 18}, // 0.21% + { 4096, 1, 16}, // 0.02% + { 4352, 1, 15}, // 0.41% + { 4736, 1, 13}, // 0.66% + { 5120, 1, 12}, // 0.41% + { 5376, 1, 12}, // 1.61% + { 5760, 1, 11}, // 1.15% + { 6144, 1, 10}, // 1.61% + { 6528, 1, 10}, // 0.41% + { 7040, 1, 9}, // 0.66% + { 7680, 1, 8}, // 0.41% + { 8192, 1, 8}, // 0.02% + { 8704, 1, 7}, // 0.41% + { 9344, 1, 7}, // 0.21% + { 9984, 1, 6}, // 1.00% + { 10880, 1, 6}, // 0.41% + { 11904, 1, 5}, // 0.12% + { 13056, 1, 5}, // 0.41% + { 14464, 1, 4}, // 0.71% + { 16384, 1, 4}, // 0.02% + { 17408, 1, 3}, // 0.41% + { 18688, 1, 3}, // 0.21% + { 20096, 1, 3}, // 0.36% + { 21760, 1, 3}, // 0.41% + { 23808, 1, 2}, // 0.12% + { 26112, 1, 2}, // 0.41% + { 29056, 1, 2}, // 0.26% + { 32768, 1, 2}, // 0.02% + { 37376, 1, 2}, // 0.21% + { 43648, 1, 2}, // 0.12% + { 45568, 2, 2}, // 4.61% + { 52352, 1, 2}, // 0.17% + { 56064, 2, 2}, // 3.92% + { 65536, 1, 2}, // 0.02% + { 74880, 2, 2}, // 0.03% + { 87296, 1, 2}, // 0.12% + { 104832, 2, 2}, // 0.03% + { 112256, 3, 2}, // 0.09% + { 131072, 1, 2}, // 0.02% + { 149760, 3, 2}, // 5.03% + { 174720, 2, 2}, // 0.03% + { 196608, 3, 2}, // 0.01% + { 209664, 4, 2}, // 0.03% + { 262144, 1, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 12 +static_assert(kMaxSize == 8192, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 1.17% + { 16, 1, 32}, // 1.17% + { 24, 1, 32}, // 1.57% + { 32, 1, 32}, // 1.17% + { 40, 1, 32}, // 1.57% + { 48, 1, 32}, // 1.57% + { 56, 1, 32}, // 1.37% + { 64, 1, 32}, // 1.17% + { 72, 1, 32}, // 2.78% + { 80, 1, 32}, // 1.57% + { 88, 1, 32}, // 2.37% + { 96, 1, 32}, // 2.78% + { 104, 1, 32}, // 2.17% + { 112, 1, 32}, // 2.78% + { 120, 1, 32}, // 1.57% + { 128, 1, 32}, // 1.17% + { 144, 1, 32}, // 2.78% + { 160, 1, 32}, // 3.60% + { 184, 1, 32}, // 2.37% + { 200, 1, 32}, // 3.60% + { 240, 1, 32}, // 1.57% + { 272, 1, 32}, // 1.57% + { 288, 1, 32}, // 2.78% + { 312, 1, 32}, // 2.17% + { 336, 1, 32}, // 2.78% + { 408, 1, 32}, // 1.57% + { 448, 1, 32}, // 2.78% + { 512, 1, 32}, // 1.17% + { 576, 2, 32}, // 2.18% + { 640, 2, 32}, // 7.29% + { 704, 2, 32}, // 6.40% + { 768, 2, 32}, // 7.29% + { 896, 2, 32}, // 2.18% + { 1024, 2, 32}, // 0.59% + { 1152, 3, 32}, // 7.08% + { 1280, 3, 32}, // 7.08% + { 1536, 3, 32}, // 0.39% + { 2048, 4, 32}, // 0.29% + { 2304, 4, 28}, // 1.88% + { 3200, 4, 20}, // 2.70% + { 4096, 4, 16}, // 0.29% + { 4736, 5, 13}, // 8.36% + { 6144, 3, 10}, // 0.39% + { 7168, 7, 9}, // 0.17% + { 8192, 4, 8}, // 0.29% +}; +#else +#error "Unsupported TCMALLOC_PAGE_SHIFT value!" +#endif +#else +#if TCMALLOC_PAGE_SHIFT == 13 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.59% + { 16, 1, 32}, // 0.59% + { 32, 1, 32}, // 0.59% + { 48, 1, 32}, // 0.98% + { 64, 1, 32}, // 0.59% + { 80, 1, 32}, // 0.98% + { 96, 1, 32}, // 0.98% + { 112, 1, 32}, // 0.78% + { 128, 1, 32}, // 0.59% + { 144, 1, 32}, // 2.18% + { 160, 1, 32}, // 0.98% + { 176, 1, 32}, // 1.78% + { 192, 1, 32}, // 2.18% + { 208, 1, 32}, // 1.58% + { 224, 1, 32}, // 2.18% + { 240, 1, 32}, // 0.98% + { 256, 1, 32}, // 0.59% + { 272, 1, 32}, // 0.98% + { 288, 1, 32}, // 2.18% + { 304, 1, 32}, // 4.25% + { 320, 1, 32}, // 3.00% + { 336, 1, 32}, // 2.18% + { 352, 1, 32}, // 1.78% + { 368, 1, 32}, // 1.78% + { 384, 1, 32}, // 2.18% + { 400, 1, 32}, // 3.00% + { 416, 1, 32}, // 4.25% + { 448, 1, 32}, // 2.18% + { 480, 1, 32}, // 0.98% + { 512, 1, 32}, // 0.59% + { 576, 1, 32}, // 2.18% + { 640, 1, 32}, // 7.29% + { 704, 1, 32}, // 6.40% + { 768, 1, 32}, // 7.29% + { 896, 1, 32}, // 2.18% + { 1024, 1, 32}, // 0.59% + { 1152, 2, 32}, // 1.88% + { 1280, 2, 32}, // 6.98% + { 1408, 2, 32}, // 6.10% + { 1536, 2, 32}, // 6.98% + { 1792, 2, 32}, // 1.88% + { 2048, 2, 32}, // 0.29% + { 2304, 2, 28}, // 1.88% + { 2688, 2, 24}, // 1.88% + { 2816, 3, 23}, // 9.30% + { 3200, 2, 20}, // 2.70% + { 3456, 3, 18}, // 1.79% + { 3584, 4, 18}, // 1.74% + { 4096, 2, 16}, // 0.29% + { 4736, 3, 13}, // 3.99% + { 5376, 2, 12}, // 1.88% + { 6144, 3, 10}, // 0.20% + { 6528, 4, 10}, // 0.54% + { 7168, 7, 9}, // 0.08% + { 8192, 2, 8}, // 0.29% + { 9472, 5, 6}, // 8.23% + { 10240, 4, 6}, // 6.82% + { 12288, 3, 5}, // 0.20% + { 13568, 5, 4}, // 0.75% + { 14336, 7, 4}, // 0.08% + { 16384, 2, 4}, // 0.29% + { 20480, 5, 3}, // 0.12% + { 24576, 3, 2}, // 0.20% + { 28672, 7, 2}, // 0.08% + { 32768, 4, 2}, // 0.15% + { 40960, 5, 2}, // 0.12% + { 49152, 6, 2}, // 0.10% + { 57344, 7, 2}, // 0.08% + { 65536, 8, 2}, // 0.07% + { 73728, 9, 2}, // 0.07% + { 81920, 10, 2}, // 0.06% + { 90112, 11, 2}, // 0.05% + { 98304, 12, 2}, // 0.05% + { 106496, 13, 2}, // 0.05% + { 114688, 14, 2}, // 0.04% + { 131072, 16, 2}, // 0.04% + { 139264, 17, 2}, // 0.03% + { 155648, 19, 2}, // 0.03% + { 163840, 20, 2}, // 0.03% + { 180224, 22, 2}, // 0.03% + { 196608, 24, 2}, // 0.02% + { 212992, 26, 2}, // 0.02% + { 229376, 28, 2}, // 0.02% + { 245760, 30, 2}, // 0.02% + { 262144, 32, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 15 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.15% + { 16, 1, 32}, // 0.15% + { 32, 1, 32}, // 0.15% + { 48, 1, 32}, // 0.24% + { 64, 1, 32}, // 0.15% + { 80, 1, 32}, // 0.29% + { 96, 1, 32}, // 0.24% + { 112, 1, 32}, // 0.34% + { 128, 1, 32}, // 0.15% + { 144, 1, 32}, // 0.39% + { 160, 1, 32}, // 0.54% + { 176, 1, 32}, // 0.24% + { 192, 1, 32}, // 0.54% + { 208, 1, 32}, // 0.49% + { 224, 1, 32}, // 0.34% + { 240, 1, 32}, // 0.54% + { 256, 1, 32}, // 0.15% + { 272, 1, 32}, // 0.54% + { 288, 1, 32}, // 0.84% + { 304, 1, 32}, // 0.89% + { 336, 1, 32}, // 0.69% + { 352, 1, 32}, // 0.24% + { 384, 1, 32}, // 0.54% + { 416, 1, 32}, // 1.13% + { 448, 1, 32}, // 0.34% + { 480, 1, 32}, // 0.54% + { 512, 1, 32}, // 0.15% + { 576, 1, 32}, // 1.74% + { 640, 1, 32}, // 0.54% + { 704, 1, 32}, // 1.33% + { 768, 1, 32}, // 1.74% + { 832, 1, 32}, // 1.13% + { 960, 1, 32}, // 0.54% + { 1024, 1, 32}, // 0.15% + { 1152, 1, 32}, // 1.74% + { 1280, 1, 32}, // 2.55% + { 1408, 1, 32}, // 1.33% + { 1536, 1, 32}, // 1.74% + { 1664, 1, 32}, // 3.80% + { 1920, 1, 32}, // 0.54% + { 2048, 1, 32}, // 0.15% + { 2176, 1, 30}, // 0.54% + { 2304, 1, 28}, // 1.74% + { 2688, 1, 24}, // 1.74% + { 2944, 1, 22}, // 1.33% + { 3200, 1, 20}, // 2.55% + { 3584, 1, 18}, // 1.74% + { 4096, 1, 16}, // 0.15% + { 4608, 1, 14}, // 1.74% + { 5376, 1, 12}, // 1.74% + { 6528, 1, 10}, // 0.54% + { 7168, 2, 9}, // 1.66% + { 8192, 1, 8}, // 0.15% + { 9344, 2, 7}, // 0.27% + { 10880, 1, 6}, // 0.54% + { 13056, 2, 5}, // 0.47% + { 14336, 4, 4}, // 1.62% + { 16384, 1, 4}, // 0.15% + { 19072, 3, 3}, // 3.14% + { 21760, 2, 3}, // 0.47% + { 24576, 3, 2}, // 0.05% + { 26112, 4, 2}, // 0.43% + { 28672, 7, 2}, // 0.02% + { 32768, 1, 2}, // 0.15% + { 38144, 5, 2}, // 7.41% + { 40960, 4, 2}, // 6.71% + { 49152, 3, 2}, // 0.05% + { 54528, 5, 2}, // 0.19% + { 65536, 2, 2}, // 0.07% + { 81920, 5, 2}, // 0.03% + { 98304, 3, 2}, // 0.05% + { 114688, 7, 2}, // 0.02% + { 131072, 4, 2}, // 0.04% + { 163840, 5, 2}, // 0.03% + { 196608, 6, 2}, // 0.02% + { 229376, 7, 2}, // 0.02% + { 262144, 8, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 18 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.02% + { 16, 1, 32}, // 0.02% + { 32, 1, 32}, // 0.02% + { 48, 1, 32}, // 0.02% + { 64, 1, 32}, // 0.02% + { 80, 1, 32}, // 0.04% + { 96, 1, 32}, // 0.04% + { 112, 1, 32}, // 0.04% + { 128, 1, 32}, // 0.02% + { 144, 1, 32}, // 0.04% + { 160, 1, 32}, // 0.04% + { 176, 1, 32}, // 0.05% + { 192, 1, 32}, // 0.04% + { 224, 1, 32}, // 0.04% + { 256, 1, 32}, // 0.02% + { 288, 1, 32}, // 0.04% + { 304, 1, 32}, // 0.05% + { 336, 1, 32}, // 0.04% + { 352, 1, 32}, // 0.12% + { 384, 1, 32}, // 0.12% + { 416, 1, 32}, // 0.04% + { 464, 1, 32}, // 0.19% + { 512, 1, 32}, // 0.02% + { 576, 1, 32}, // 0.04% + { 640, 1, 32}, // 0.17% + { 704, 1, 32}, // 0.12% + { 768, 1, 32}, // 0.12% + { 832, 1, 32}, // 0.04% + { 1024, 1, 32}, // 0.02% + { 1152, 1, 32}, // 0.26% + { 1280, 1, 32}, // 0.41% + { 1408, 1, 32}, // 0.12% + { 1664, 1, 32}, // 0.36% + { 1792, 1, 32}, // 0.21% + { 1920, 1, 32}, // 0.41% + { 2048, 1, 32}, // 0.02% + { 2176, 1, 30}, // 0.41% + { 2304, 1, 28}, // 0.71% + { 2560, 1, 25}, // 0.41% + { 2944, 1, 22}, // 0.07% + { 3328, 1, 19}, // 1.00% + { 3584, 1, 18}, // 0.21% + { 4096, 1, 16}, // 0.02% + { 4352, 1, 15}, // 0.41% + { 4736, 1, 13}, // 0.66% + { 5120, 1, 12}, // 0.41% + { 5376, 1, 12}, // 1.61% + { 5760, 1, 11}, // 1.15% + { 6144, 1, 10}, // 1.61% + { 6528, 1, 10}, // 0.41% + { 7040, 1, 9}, // 0.66% + { 7424, 1, 8}, // 0.91% + { 7680, 1, 8}, // 0.41% + { 8192, 1, 8}, // 0.02% + { 8704, 1, 7}, // 0.41% + { 9344, 1, 7}, // 0.21% + { 9984, 1, 6}, // 1.00% + { 10880, 1, 6}, // 0.41% + { 11904, 1, 5}, // 0.12% + { 13056, 1, 5}, // 0.41% + { 13696, 1, 4}, // 0.76% + { 14464, 1, 4}, // 0.71% + { 15360, 1, 4}, // 0.41% + { 16384, 1, 4}, // 0.02% + { 17408, 1, 3}, // 0.41% + { 18688, 1, 3}, // 0.21% + { 20096, 1, 3}, // 0.36% + { 21760, 1, 3}, // 0.41% + { 23808, 1, 2}, // 0.12% + { 26112, 1, 2}, // 0.41% + { 29056, 1, 2}, // 0.26% + { 32768, 1, 2}, // 0.02% + { 37376, 1, 2}, // 0.21% + { 43648, 1, 2}, // 0.12% + { 45568, 2, 2}, // 4.61% + { 52352, 1, 2}, // 0.17% + { 56064, 2, 2}, // 3.92% + { 65536, 1, 2}, // 0.02% + { 74880, 2, 2}, // 0.03% + { 87296, 1, 2}, // 0.12% + { 104832, 2, 2}, // 0.03% + { 112256, 3, 2}, // 0.09% + { 131072, 1, 2}, // 0.02% + { 149760, 3, 2}, // 5.03% + { 174720, 2, 2}, // 0.03% + { 196608, 3, 2}, // 0.01% + { 209664, 4, 2}, // 0.03% + { 262144, 1, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 12 +static_assert(kMaxSize == 8192, "kMaxSize mismatch"); +const SizeClassInfo SizeMap::kSizeClasses[] = { + // , , + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 1.17% + { 16, 1, 32}, // 1.17% + { 32, 1, 32}, // 1.17% + { 48, 1, 32}, // 1.57% + { 64, 1, 32}, // 1.17% + { 80, 1, 32}, // 1.57% + { 96, 1, 32}, // 2.78% + { 112, 1, 32}, // 2.78% + { 128, 1, 32}, // 1.17% + { 144, 1, 32}, // 2.78% + { 160, 1, 32}, // 3.60% + { 176, 1, 32}, // 2.37% + { 192, 1, 32}, // 2.78% + { 208, 1, 32}, // 4.86% + { 224, 1, 32}, // 2.78% + { 240, 1, 32}, // 1.57% + { 256, 1, 32}, // 1.17% + { 272, 1, 32}, // 1.57% + { 288, 1, 32}, // 2.78% + { 304, 1, 32}, // 4.86% + { 336, 1, 32}, // 2.78% + { 368, 1, 32}, // 2.37% + { 400, 1, 32}, // 3.60% + { 448, 1, 32}, // 2.78% + { 512, 1, 32}, // 1.17% + { 576, 2, 32}, // 2.18% + { 640, 2, 32}, // 7.29% + { 704, 2, 32}, // 6.40% + { 768, 2, 32}, // 7.29% + { 896, 2, 32}, // 2.18% + { 1024, 2, 32}, // 0.59% + { 1152, 3, 32}, // 7.08% + { 1280, 3, 32}, // 7.08% + { 1536, 3, 32}, // 0.39% + { 1792, 4, 32}, // 1.88% + { 2048, 4, 32}, // 0.29% + { 2304, 4, 28}, // 1.88% + { 2688, 4, 24}, // 1.88% + { 3200, 4, 20}, // 2.70% + { 3584, 7, 18}, // 0.17% + { 4096, 4, 16}, // 0.29% + { 4736, 5, 13}, // 8.36% + { 6144, 3, 10}, // 0.39% + { 7168, 7, 9}, // 0.17% + { 8192, 4, 8}, // 0.29% +}; +#else +#error "Unsupported TCMALLOC_PAGE_SHIFT value!" +#endif +#endif +// clang-format on + +} // namespace tcmalloc diff --git a/tcmalloc/size_classes_test.cc b/tcmalloc/size_classes_test.cc new file mode 100644 index 000000000..38f85c358 --- /dev/null +++ b/tcmalloc/size_classes_test.cc @@ -0,0 +1,237 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "gtest/gtest.h" +#include "tcmalloc/common.h" +#include "tcmalloc/size_class_info.h" +#include "tcmalloc/span.h" + +namespace tcmalloc { + +namespace { + +size_t Alignment(size_t size) { + size_t ret = kAlignment; + if (size >= 1024) { + // SizeMap::ClassIndexMaybe requires 128-byte alignment for sizes >=1024. + ret = 128; + } else if (size >= 512) { + // Per //tcmalloc/span.h, we have 64 byte alignment for sizes + // >=512. + ret = 64; + } else if (size >= 8) { + ret = 8; + } + + return ret; +} + +class SizeClassesTest : public ::testing::Test { + protected: + SizeClassesTest() { m_.Init(); } + + SizeMap m_; +}; + +TEST_F(SizeClassesTest, SmallClassesSinglePage) { + // Per //tcmalloc/span.h, the compressed index implementation + // added by cl/126729493 requires small size classes to be placed on a single + // page span so they can be addressed. + for (int c = 1; c < kNumClasses; c++) { + const size_t max_size_in_class = m_.class_to_size(c); + if (max_size_in_class >= SizeMap::kMultiPageSize) { + continue; + } + EXPECT_EQ(m_.class_to_pages(c), 1) << max_size_in_class; + } +} + +TEST_F(SizeClassesTest, Aligned) { + // Validate that each size class is properly aligned. + for (int c = 1; c < kNumClasses; c++) { + const size_t max_size_in_class = m_.class_to_size(c); + size_t alignment = Alignment(max_size_in_class); + + EXPECT_EQ(0, max_size_in_class % alignment) << max_size_in_class; + } +} + +TEST_F(SizeClassesTest, Distinguishable) { + // Validate that the size to class lookup table is able to distinguish each + // size class from one another. + // + // ClassIndexMaybe provides 8 byte granularity below 1024 bytes and 128 byte + // granularity for larger sizes, so our chosen size classes cannot be any + // finer (otherwise they would map to the same entry in the lookup table). + for (int c = 1; c < kNumClasses; c++) { + const size_t max_size_in_class = m_.class_to_size(c); + const int class_index = m_.SizeClass(max_size_in_class); + + EXPECT_EQ(c, class_index) << max_size_in_class; + } +} + +// This test is disabled until we use a different span size allocation +// algorithm (such as the one in effect from cl/130150125 until cl/139955211). +TEST_F(SizeClassesTest, DISABLED_WastedSpan) { + // Validate that each size class does not waste (number of objects) * + // (alignment) at the end of the span. + for (int c = 1; c < kNumClasses; c++) { + const size_t span_size = kPageSize * m_.class_to_pages(c); + const size_t max_size_in_class = m_.class_to_size(c); + const size_t alignment = Alignment(max_size_in_class); + const size_t n_objects = span_size / max_size_in_class; + const size_t waste = span_size - n_objects * max_size_in_class; + + EXPECT_LT(waste, n_objects * alignment) << max_size_in_class; + } +} + +TEST_F(SizeClassesTest, DoubleCheckedConsistency) { + // Validate that every size on [0, kMaxSize] maps to a size class that is + // neither too big nor too small. + for (size_t size = 0; size <= kMaxSize; size++) { + const int sc = m_.SizeClass(size); + EXPECT_GT(sc, 0) << size; + EXPECT_LT(sc, kNumClasses) << size; + + if (sc > 1) { + EXPECT_GT(size, m_.class_to_size(sc - 1)) + << "Allocating unnecessarily large class"; + } + + const size_t s = m_.class_to_size(sc); + EXPECT_LE(size, s); + EXPECT_NE(s, 0) << size; + } +} + +class TestingSizeMap : public SizeMap { + public: + TestingSizeMap() {} + + bool ValidSizeClasses(int num_classes, const SizeClassInfo* parsed) { + return SizeMap::ValidSizeClasses(num_classes, parsed); + } + + const SizeClassInfo* DefaultSizeClasses() const { return kSizeClasses; } +}; + +class RunTimeSizeClassesTest : public ::testing::Test { + protected: + RunTimeSizeClassesTest() {} + + TestingSizeMap m_; +}; + +TEST_F(RunTimeSizeClassesTest, ValidateClassSizeIncreases) { + SizeClassInfo parsed[] = { + {0, 0, 0}, + {16, 1, 14}, + {32, 1, 15}, + {kMaxSize, 1, 15}, + }; + EXPECT_TRUE(m_.ValidSizeClasses(4, parsed)); + + parsed[2].size = 8; // Change 32 to 8 + EXPECT_FALSE(m_.ValidSizeClasses(4, parsed)); +} + +TEST_F(RunTimeSizeClassesTest, ValidateClassSizeMax) { + SizeClassInfo parsed[] = { + {0, 0, 0}, + {kMaxSize - 128, 1, 15}, + }; + // Last class must cover kMaxSize + EXPECT_FALSE(m_.ValidSizeClasses(2, parsed)); + + // Check Max Size is allowed 256 KiB = 262144 + parsed[1].size = kMaxSize; + EXPECT_TRUE(m_.ValidSizeClasses(2, parsed)); + // But kMaxSize + 128 is not allowed + parsed[1].size = kMaxSize + 128; + EXPECT_FALSE(m_.ValidSizeClasses(2, parsed)); +} + +TEST_F(RunTimeSizeClassesTest, ValidateClassSizesAlignment) { + SizeClassInfo parsed[] = { + {0, 0, 0}, + {8, 1, 14}, + {kMaxSize, 1, 15}, + }; + EXPECT_TRUE(m_.ValidSizeClasses(3, parsed)); + // Doesn't meet alignment requirements + parsed[1].size = 7; + EXPECT_FALSE(m_.ValidSizeClasses(3, parsed)); + + // Over 512, expect alignment of 64 bytes. + // 512 + 64 = 576 + parsed[1].size = 576; + EXPECT_TRUE(m_.ValidSizeClasses(3, parsed)); + // 512 + 8 + parsed[1].size = 520; + EXPECT_FALSE(m_.ValidSizeClasses(3, parsed)); + + // Over 1024, expect alignment of 128 bytes. + // 1024 + 128 = 1152 + parsed[1].size = 1024 + 128; + EXPECT_TRUE(m_.ValidSizeClasses(3, parsed)); + // 1024 + 64 = 1088 + parsed[1].size = 1024 + 64; + EXPECT_FALSE(m_.ValidSizeClasses(3, parsed)); +} + +TEST_F(RunTimeSizeClassesTest, ValidateBatchSize) { + SizeClassInfo parsed[] = { + {0, 0, 0}, + {8, 1, kMaxObjectsToMove}, + {kMaxSize, 1, 15}, + }; + EXPECT_TRUE(m_.ValidSizeClasses(3, parsed)); + + ++parsed[1].num_to_move; + EXPECT_FALSE(m_.ValidSizeClasses(3, parsed)); +} + +TEST_F(RunTimeSizeClassesTest, ValidatePageSize) { + SizeClassInfo parsed[] = { + {0, 0, 0}, + {1024, 255, kMaxObjectsToMove}, + {kMaxSize, 1, 15}, + }; + EXPECT_TRUE(m_.ValidSizeClasses(3, parsed)); + + parsed[1].pages = 256; + EXPECT_FALSE(m_.ValidSizeClasses(3, parsed)); +} + +TEST_F(RunTimeSizeClassesTest, ValidateDefaultSizeClasses) { + // The default size classes also need to be valid. + EXPECT_TRUE(m_.ValidSizeClasses(kNumClasses, m_.DefaultSizeClasses())); +} + +TEST_F(RunTimeSizeClassesTest, EnvVariableNotExamined) { + // Set a valid runtime size class environment variable + setenv("TCMALLOC_SIZE_CLASSES", "256,1,1", 1); + m_.Init(); + // Without runtime_size_classes library linked, the environment variable + // should have no affect. + EXPECT_NE(m_.class_to_size(1), 256); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/size_classes_with_runtime_size_classes_test.cc b/tcmalloc/size_classes_with_runtime_size_classes_test.cc new file mode 100644 index 000000000..0f5edb0b4 --- /dev/null +++ b/tcmalloc/size_classes_with_runtime_size_classes_test.cc @@ -0,0 +1,115 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "tcmalloc/common.h" +#include "tcmalloc/size_class_info.h" +#include "tcmalloc/span.h" + +namespace tcmalloc { + +namespace { + +class TestingSizeMap : public SizeMap { + public: + TestingSizeMap() {} + + const SizeClassInfo* DefaultSizeClasses() const { return kSizeClasses; } +}; + +class RunTimeSizeClassesTest : public ::testing::Test { + protected: + RunTimeSizeClassesTest() {} + + TestingSizeMap m_; +}; + +// Convert size classes into a string that can be passed to ParseSizeClasses(). +std::string SizeClassesToString(int num_classes, const SizeClassInfo* parsed) { + std::string result; + for (int c = 1; c < num_classes; c++) { + std::string one_size = absl::StrFormat( + "%d,%d,%d", parsed[c].size, parsed[c].pages, parsed[c].num_to_move); + if (c == 1) { + result = one_size; + } else { + absl::StrAppend(&result, ";", one_size); + } + } + return result; +} + +std::string ModifiedSizeClassesString(int num_classes, + const SizeClassInfo* source) { + // Set a valid runtime size class environment variable, which + // is a modified version of the default class sizes. + SizeClassInfo parsed[kNumClasses]; + for (int c = 0; c < kNumClasses; c++) { + parsed[c] = source[c]; + } + // Change num_to_move to a different valid value so that + // loading from the ENV can be detected. + EXPECT_NE(parsed[1].num_to_move, 3); + parsed[1].num_to_move = 3; + return SizeClassesToString(num_classes, parsed); +} + +TEST_F(RunTimeSizeClassesTest, EnvVariableExamined) { + std::string e = + ModifiedSizeClassesString(kNumClasses, m_.DefaultSizeClasses()); + setenv("TCMALLOC_SIZE_CLASSES", e.c_str(), 1); + m_.Init(); + + // Confirm that the expected change is seen. + EXPECT_EQ(m_.num_objects_to_move(1), 3); +} + +// TODO(b/122839049) - Remove this test after bug is fixed. +TEST_F(RunTimeSizeClassesTest, ReducingSizeClassCountNotAllowed) { + // Try reducing the mumber of size classes by 1, which is expected to fail. + std::string e = + ModifiedSizeClassesString(kNumClasses - 1, m_.DefaultSizeClasses()); + setenv("TCMALLOC_SIZE_CLASSES", e.c_str(), 1); + m_.Init(); + + // Confirm that the expected change is not seen. + EXPECT_EQ(m_.num_objects_to_move(1), m_.DefaultSizeClasses()[1].num_to_move); +} + +// Convert the static classes to a string, parse that string via +// the environement variable and check that we get exactly the same +// results. Note, if the environement variable was not read, this test +// would still pass. +TEST_F(RunTimeSizeClassesTest, EnvRealClasses) { + std::string e = SizeClassesToString(kNumClasses, m_.DefaultSizeClasses()); + setenv("TCMALLOC_SIZE_CLASSES", e.c_str(), 1); + m_.Init(); + // With the runtime_size_classes library linked, the environment variable + // will be parsed. + + for (int c = 0; c < kNumClasses; c++) { + EXPECT_EQ(m_.class_to_size(c), m_.DefaultSizeClasses()[c].size); + EXPECT_EQ(m_.class_to_pages(c), m_.DefaultSizeClasses()[c].pages); + EXPECT_EQ(m_.num_objects_to_move(c), + m_.DefaultSizeClasses()[c].num_to_move); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/span.cc b/tcmalloc/span.cc new file mode 100644 index 000000000..d2e7a7a5b --- /dev/null +++ b/tcmalloc/span.cc @@ -0,0 +1,270 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/span.h" + +#include + +#include + +#include "tcmalloc/common.h" +#include "tcmalloc/internal/atomic_stats_counter.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/page_heap_allocator.h" +#include "tcmalloc/pagemap.h" +#include "tcmalloc/sampler.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { + +void Span::Sample(StackTrace* stack) { + ASSERT(!sampled_ && stack); + sampled_ = 1; + sampled_stack_ = stack; + Static::sampled_objects_.prepend(this); + // LossyAdd is ok: writes to sampled_objects_size_ guarded by pageheap_lock. + // The cast to value matches Unsample. + Static::sampled_objects_size_.LossyAdd( + static_cast( + AllocatedBytes(*stack, true))); +} + +StackTrace* Span::Unsample() { + if (!sampled_) { + return nullptr; + } + sampled_ = 0; + StackTrace* stack = sampled_stack_; + sampled_stack_ = nullptr; + RemoveFromList(); // from Static::sampled_objects_ + // LossyAdd is ok: writes to sampled_objects_size_ guarded by pageheap_lock. + // The cast to Value ensures no funny business happens during the negation if + // sizeof(size_t) != sizeof(Value). + Static::sampled_objects_size_.LossyAdd( + -static_cast( + AllocatedBytes(*stack, true))); + return stack; +} + +double Span::Fragmentation() const { + const size_t cl = Static::pagemap()->sizeclass(first_page_); + if (cl == 0) { + // Avoid crashes in production mode code, but report in tests. + ASSERT(cl != 0); + return 0; + } + const size_t obj_size = Static::sizemap()->class_to_size(cl); + const size_t span_objects = bytes_in_span() / obj_size; + const size_t live = allocated_; + if (live == 0) { + // Avoid crashes in production mode code, but report in tests. + ASSERT(live != 0); + return 0; + } + // Assume that all in-use objects in this span are spread evenly + // through this span. So charge the free space in span evenly + // to each of the live objects. + // A note on units here: StackTraceTable::AddTrace(1, *t) + // represents usage (of whatever kind: heap space, allocation, + // fragmentation) of 1 object of size t->allocated_size. + // So we want to report here the number of objects we are "responsible" + // for pinning - NOT bytes. + return static_cast(span_objects - live) / live; +} + +void Span::AverageFreelistAddedTime(const Span* other) { + // Do this computation as floating-point to avoid overflowing our uint64_t. + freelist_added_time_ = static_cast( + (static_cast(freelist_added_time_) * num_pages_ + + static_cast(other->freelist_added_time_) * other->num_pages_) / + (num_pages_ + other->num_pages_)); +} + +// Freelist organization. +// +// Partially full spans in CentralFreeList contain a list of free objects +// (freelist). We could use the free objects as linked list nodes and form +// a stack, but since the free objects are not likely to be cache-hot the +// chain of dependent misses is very cache-unfriendly. The current +// organization reduces number of cache misses during push/pop. +// +// Objects in the freelist are represented by 2-byte indices. The index is +// object offset from the span start divided by a constant. For small objects +// (<512) divider is 8, for larger -- 64. This allows to fit all indices into +// 2 bytes. +// +// The freelist has two components. First, we have a small array-based cache +// (4 objects) embedded directly into the Span (cache_ and cache_size_). We can +// access this without touching any objects themselves. +// +// The rest of the freelist is stored as arrays inside free objects themselves. +// We can store object_size / 2 indexes in any object, but this is not always +// sufficient to store the entire contents of a Span in a single object. So we +// reserve the first index slot in an object to form a linked list. We use the +// first object in that list (freelist_) as an array to push/pop from; any +// subsequent objects in the list's arrays are guaranteed to be full. +// +// Graphically this can be depicted as follows: +// +// freelist_ embed_count_ cache_ cache_size_ +// Span: [ |idx| 4 |idx|idx|---|---| 2 ] +// | +// \/ +// [idx|idx|idx|idx|idx|---|---|---] 16-byte object +// | +// \/ +// [---|idx|idx|idx|idx|idx|idx|idx] 16-byte object +// + +Span::ObjIdx Span::PtrToIdx(void* ptr, size_t size) const { + // Object index is an offset from span start divided by a power-of-two. + // The divisors are choosen so that + // (1) objects are aligned on the divisor, + // (2) index fits into 16 bits and + // (3) the index of the beginning of all objects is strictly less than + // kListEnd (note that we have 256K pages and multi-page spans). + // For example with 1M spans we need kMultiPageAlignment >= 16. + // An ASSERT in BuildFreelist() verifies a condition which implies (3). + uintptr_t p = reinterpret_cast(ptr); + uintptr_t off; + if (size <= SizeMap::kMultiPageSize) { + // Generally we need to load first_page_ to compute the offset. + // But first_page_ can be in a different cache line then the fields that + // we use in FreelistPush otherwise (cache_, cache_size_, freelist_). + // So we avoid loading first_page_ for smaller sizes that have one page per + // span, instead we compute the offset by taking low kPageShift bits of the + // pointer. + ASSERT(p - first_page_ * kPageSize < kPageSize); + off = (p & (kPageSize - 1)) / kAlignment; + } else { + off = (p - (first_page_ * kPageSize)) / SizeMap::kMultiPageAlignment; + } + ObjIdx idx = static_cast(off); + ASSERT(idx != kListEnd); + ASSERT(idx == off); + return idx; +} + +Span::ObjIdx* Span::IdxToPtr(ObjIdx idx, size_t size) const { + ASSERT(idx != kListEnd); + uintptr_t off = + first_page_ * kPageSize + (static_cast(idx) + << (size <= SizeMap::kMultiPageSize + ? kAlignmentShift + : SizeMap::kMultiPageAlignmentShift)); + ObjIdx* ptr = reinterpret_cast(off); + ASSERT(PtrToIdx(ptr, size) == idx); + return ptr; +} + +bool Span::FreelistPush(void* ptr, size_t size) { + ASSERT(allocated_ > 0); + if (allocated_ == 1) { + return false; + } + allocated_--; + + ObjIdx idx = PtrToIdx(ptr, size); + if (cache_size_ != kCacheSize) { + // Have empty space in the cache, push there. + cache_[cache_size_] = idx; + cache_size_++; + } else if (freelist_ != kListEnd && + // -1 because the first slot is used by freelist link. + embed_count_ != size / sizeof(ObjIdx) - 1) { + // Push onto the first object on freelist. + ObjIdx* host; + if (size <= SizeMap::kMultiPageSize) { + // Avoid loading first_page_ in this case (see the comment in PtrToIdx). + ASSERT(num_pages_ == 1); + host = reinterpret_cast( + (reinterpret_cast(ptr) & ~(kPageSize - 1)) + + static_cast(freelist_) * kAlignment); + ASSERT(PtrToIdx(host, size) == freelist_); + } else { + host = IdxToPtr(freelist_, size); + } + embed_count_++; + host[embed_count_] = idx; + } else { + // Push onto freelist. + *reinterpret_cast(ptr) = freelist_; + freelist_ = idx; + embed_count_ = 0; + } + return true; +} + +size_t Span::FreelistPopBatch(void** __restrict batch, size_t N, size_t size) { + if (size <= SizeMap::kMultiPageSize) { + return FreelistPopBatchSized(batch, N, size); + } else { + return FreelistPopBatchSized(batch, N, size); + } +} + +void Span::BuildFreelist(size_t size, size_t count) { + allocated_ = 0; + cache_size_ = 0; + embed_count_ = 0; + freelist_ = kListEnd; + + ObjIdx idx = 0; + ObjIdx idxStep = size / kAlignment; + // Valid objects are {0, idxStep, idxStep * 2, ..., idxStep * (count - 1)}. + if (size > SizeMap::kMultiPageSize) { + idxStep = size / SizeMap::kMultiPageAlignment; + } + + // Verify that the end of the useful portion of the span (and the beginning of + // the span waste) has an index that doesn't overflow or risk confusion with + // kListEnd. This is slightly stronger than we actually need (see comment in + // PtrToIdx for that) but rules out some bugs and weakening it wouldn't + // actually help. One example of the potential bugs that are ruled out is the + // possibility of idxEnd (below) overflowing. + ASSERT(count * idxStep < kListEnd); + + // The index of the end of the useful portion of the span. + ObjIdx idxEnd = count * idxStep; + // First, push as much as we can into the cache_. + for (; idx < idxEnd && cache_size_ < kCacheSize; idx += idxStep) { + cache_[cache_size_] = idx; + cache_size_++; + } + // Now, build freelist and stack other objects onto freelist objects. + // Note: we take freelist objects from the beginning and stacked objects + // from the end. This has a nice property of not paging in whole span at once + // and not draining whole cache. + ObjIdx* host = nullptr; // cached first object on freelist + const size_t max_embed = size / sizeof(ObjIdx) - 1; + while (idx < idxEnd) { + // Check the no idx can be confused with kListEnd. + ASSERT(idx != kListEnd); + if (host && embed_count_ != max_embed) { + // Push onto first object on the freelist. + embed_count_++; + idxEnd -= idxStep; + host[embed_count_] = idxEnd; + } else { + // The first object is full, push new object onto freelist. + host = IdxToPtr(idx, size); + host[0] = freelist_; + freelist_ = idx; + embed_count_ = 0; + idx += idxStep; + } + } +} + +} // namespace tcmalloc diff --git a/tcmalloc/span.h b/tcmalloc/span.h new file mode 100644 index 000000000..7ed569f29 --- /dev/null +++ b/tcmalloc/span.h @@ -0,0 +1,347 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// A Span is a contiguous run of pages. + +#ifndef TCMALLOC_SPAN_H_ +#define TCMALLOC_SPAN_H_ + +#include +#include +#include + +#include "absl/base/thread_annotations.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/linked_list.h" +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { + +// Information kept for a span (a contiguous run of pages). +// +// Spans can be in different states. The current state determines set of methods +// that can be called on the span (and the active member in the union below). +// States are: +// - SMALL_OBJECT: the span holds multiple small objects. +// The span is owned by CentralFreeList and is generally on +// CentralFreeList::nonempty_ list (unless has no free objects). +// location_ == IN_USE. +// - LARGE_OBJECT: the span holds a single large object. +// The span can be considered to be owner by user until the object is freed. +// location_ == IN_USE. +// - SAMPLED: the span holds a single sampled object. +// The span can be considered to be owner by user until the object is freed. +// location_ == IN_USE && sampled_ == 1. +// - ON_NORMAL_FREELIST: the span has no allocated objects, owned by PageHeap +// and is on normal PageHeap list. +// location_ == ON_NORMAL_FREELIST. +// - ON_RETURNED_FREELIST: the span has no allocated objects, owned by PageHeap +// and is on returned PageHeap list. +// location_ == ON_RETURNED_FREELIST. +class Span; +typedef TList SpanList; + +class Span : public SpanList::Elem { + public: + // Allocator/deallocator for spans. Note that these functions are defined + // in static_vars.h, which is weird: see there for why. + static Span* New(PageID p, Length len) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + static void Delete(Span* span) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Remove this from the linked list in which it resides. + // REQUIRES: this span is on some list. + void RemoveFromList(); + + // locations used to track what list a span resides on. + enum Location { + IN_USE, // not on PageHeap lists + ON_NORMAL_FREELIST, // on normal PageHeap list + ON_RETURNED_FREELIST, // on returned PageHeap list + }; + Location location() const; + void set_location(Location loc); + + // --------------------------------------------------------------------------- + // Support for sampled allocations. + // There is one-to-one correspondence between a sampled allocation and a span. + // --------------------------------------------------------------------------- + + // Mark this span as sampling allocation at the stack. Sets state to SAMPLED. + void Sample(StackTrace* stack) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Unmark this span as sampling an allocation. + // Returns stack trace previously passed to Sample, + // or nullptr if this is a non-sampling span. + // REQUIRES: this is a SAMPLED span. + StackTrace* Unsample() EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Returns stack for the sampled allocation. + // pageheap_lock is not required, but caller either needs to hold the lock or + // ensure by some other means that the sampling state can't be changed + // concurrently. + // REQUIRES: this is a SAMPLED span. + StackTrace* sampled_stack() const; + + // Is it a sampling span? + // For debug checks. pageheap_lock is not required, but caller needs to ensure + // that sampling state can't be changed concurrently. + bool sampled() const; + + // --------------------------------------------------------------------------- + // Span memory range. + // --------------------------------------------------------------------------- + + // Returns first page of the span. + PageID first_page() const; + + // Returns the last page in the span. + PageID last_page() const; + + // Sets span first page. + void set_first_page(PageID p); + + // Returns start address of the span. + void* start_address() const; + + // Returns number of pages in the span. + Length num_pages() const; + + // Sets number of pages in the span. + void set_num_pages(Length len); + + // Total memory bytes in the span. + size_t bytes_in_span() const; + + // --------------------------------------------------------------------------- + // Age tracking (for free spans in PageHeap). + // --------------------------------------------------------------------------- + + uint64_t freelist_added_time() const; + void set_freelist_added_time(uint64_t t); + + // Sets this span freelist added time to average of this and other times + // weighted by their sizes. + // REQUIRES: this is a ON_NORMAL_FREELIST or ON_RETURNED_FREELIST span. + void AverageFreelistAddedTime(const Span* other); + + // Returns internal fragmentation of the span. + // REQUIRES: this is a SMALL_OBJECT span. + double Fragmentation() const; + + // --------------------------------------------------------------------------- + // Freelist management. + // Used for spans in CentralFreelist to manage free objects. + // These methods REQUIRE a SMALL_OBJECT span. + // --------------------------------------------------------------------------- + + // Span freelist is empty? + bool FreelistEmpty() const; + + // Pushes ptr onto freelist unless the freelist becomes full, + // in which case just return false. + bool FreelistPush(void* ptr, size_t size); + + // Pops up to N objects from the freelist and returns them in the batch array. + // Returns number of objects actually popped. + size_t FreelistPopBatch(void** batch, size_t N, size_t size); + + // Reset a Span object to track the range [p, p + n). + void Init(PageID p, Length n); + + // Initialize freelist to contain all objects in the span. + void BuildFreelist(size_t size, size_t count); + + private: + // See the comment on freelist organization in cc file. + typedef uint16_t ObjIdx; + static const size_t kCacheSize = 4; + static const ObjIdx kListEnd = -1; + + // Use uint16_t or uint8_t for 16 bit and 8 bit fields instead of bitfields. + // LLVM will generate widen load/store and bit masking operations to access + // bitfields and this hurts performance. Although compiler flag + // -ffine-grained-bitfield-accesses can help the performance if bitfields + // are used here, but the flag could potentially hurt performance in other + // cases so it is not enabled by default. For more information, please + // look at b/35680381 and cl/199502226. + uint16_t allocated_; // Number of non-free objects + uint16_t embed_count_; + uint16_t freelist_; + uint8_t cache_size_; + uint8_t location_ : 2; // Is the span on a freelist, and if so, which? + uint8_t sampled_ : 1; // Sampled object? + + union { + // Used only for spans in CentralFreeList (SMALL_OBJECT state). + // Embed cache of free objects. + ObjIdx cache_[kCacheSize]; + + // Used only for sampled spans (SAMPLED state). + StackTrace* sampled_stack_; + + // Used only for spans in PageHeap + // (ON_NORMAL_FREELIST or ON_RETURNED_FREELIST state). + // Time when this span was added to a freelist. Units: cycles. When a span + // is merged into this one, we set this to the average of now and the + // current freelist_added_time, weighted by the two spans' sizes. + uint64_t freelist_added_time_; + }; + + PageID first_page_; // Starting page number. + Length num_pages_; // Number of pages in span. + + // Convert object pointer <-> freelist index. + ObjIdx PtrToIdx(void* ptr, size_t size) const; + ObjIdx* IdxToPtr(ObjIdx idx, size_t size) const; + + template + ObjIdx* IdxToPtrSized(ObjIdx idx, size_t size) const; + + template + size_t FreelistPopBatchSized(void** __restrict batch, size_t N, size_t size); + + enum Align { SMALL, LARGE }; +}; + +template +Span::ObjIdx* Span::IdxToPtrSized(ObjIdx idx, size_t size) const { + ASSERT(idx != kListEnd); + ASSERT(align == Align::LARGE || align == Align::SMALL); + uintptr_t off = + first_page_ * kPageSize + + (static_cast(idx) + << (align == Align::SMALL ? kAlignmentShift + : SizeMap::kMultiPageAlignmentShift)); + ObjIdx* ptr = reinterpret_cast(off); + ASSERT(PtrToIdx(ptr, size) == idx); + return ptr; +} + +template +size_t Span::FreelistPopBatchSized(void** __restrict batch, size_t N, + size_t size) { + size_t result = 0; + + // Pop from cache. + auto csize = cache_size_; + auto cache_reads = csize < N ? csize : N; + for (; result < cache_reads; result++) { + batch[result] = IdxToPtrSized(cache_[csize - result - 1], size); + } + + // Store this->cache_size_ one time. + cache_size_ = csize - result; + + while (result < N) { + if (freelist_ == kListEnd) { + break; + } + + ObjIdx* const host = IdxToPtrSized(freelist_, size); + uint16_t embed_count = embed_count_; + ObjIdx current = host[embed_count]; + + size_t iter = embed_count; + if (result + embed_count > N) { + iter = N - result; + } + for (size_t i = 0; i < iter; i++) { + // Pop from the first object on freelist. + batch[result + i] = IdxToPtrSized(host[embed_count - i], size); + } + embed_count -= iter; + result += iter; + + // Update current for next cycle. + current = host[embed_count]; + + if (result == N) { + embed_count_ = embed_count; + break; + } + + // The first object on the freelist is empty, pop it. + ASSERT(embed_count == 0); + + batch[result] = host; + result++; + + freelist_ = current; + embed_count_ = size / sizeof(ObjIdx) - 1; + } + allocated_ += result; + return result; +} + +inline Span::Location Span::location() const { + return static_cast(location_); +} + +inline void Span::set_location(Location loc) { + location_ = static_cast(loc); +} + +inline StackTrace* Span::sampled_stack() const { + ASSERT(sampled_); + return sampled_stack_; +} + +inline bool Span::sampled() const { return sampled_; } + +inline PageID Span::first_page() const { return first_page_; } + +inline PageID Span::last_page() const { return first_page_ + num_pages_ - 1; } + +inline void Span::set_first_page(PageID p) { first_page_ = p; } + +inline void* Span::start_address() const { + return reinterpret_cast(first_page_ * kPageSize); +} + +inline Length Span::num_pages() const { return num_pages_; } + +inline void Span::set_num_pages(Length len) { num_pages_ = len; } + +inline size_t Span::bytes_in_span() const { return num_pages_ << kPageShift; } + +inline void Span::set_freelist_added_time(uint64_t t) { + freelist_added_time_ = t; +} + +inline uint64_t Span::freelist_added_time() const { + return freelist_added_time_; +} + +inline bool Span::FreelistEmpty() const { + return cache_size_ == 0 && freelist_ == kListEnd; +} + +inline void Span::RemoveFromList() { SpanList::Elem::remove(); } + +inline void Span::Init(PageID p, Length n) { + first_page_ = p; + num_pages_ = n; + location_ = IN_USE; + sampled_ = 0; +#ifndef NDEBUG + // In debug mode we have additional checking of our list ops; + // these must be initialized. + memset(static_cast(this), 0, sizeof(SpanList::Elem)); +#endif +} + +} // namespace tcmalloc + +#endif // TCMALLOC_SPAN_H_ diff --git a/tcmalloc/span_test.cc b/tcmalloc/span_test.cc new file mode 100644 index 000000000..8ff7a0e42 --- /dev/null +++ b/tcmalloc/span_test.cc @@ -0,0 +1,183 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/span.h" + +#include + +#include +#include + +#include "gtest/gtest.h" +#include "absl/base/internal/spinlock.h" +#include "absl/container/flat_hash_set.h" +#include "absl/random/random.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { + +class RawSpan { + public: + void Init(size_t cl) { + size_t size = Static::sizemap()->class_to_size(cl); + size_t npages = Static::sizemap()->class_to_pages(cl); + size_t objects_per_span = npages * kPageSize / size; + + void *mem; + int res = posix_memalign(&mem, kPageSize, npages * kPageSize); + CHECK_CONDITION(res == 0); + span_.set_first_page(reinterpret_cast(mem) / kPageSize); + span_.set_num_pages(npages); + span_.BuildFreelist(size, objects_per_span); + } + + ~RawSpan() { free(span_.start_address()); } + + Span &span() { return span_; } + + private: + Span span_; +}; + +class SpanTest : public testing::TestWithParam { + protected: + size_t cl_; + size_t size_; + size_t npages_; + size_t batch_size_; + size_t objects_per_span_; + RawSpan raw_span_; + + private: + void SetUp() override { + cl_ = GetParam(); + size_ = Static::sizemap()->class_to_size(cl_); + npages_ = Static::sizemap()->class_to_pages(cl_); + batch_size_ = Static::sizemap()->num_objects_to_move(cl_); + objects_per_span_ = npages_ * kPageSize / size_; + + raw_span_.Init(cl_); + } + + void TearDown() override {} +}; + +TEST_P(SpanTest, FreelistBasic) { + Span &span_ = raw_span_.span(); + + EXPECT_FALSE(span_.FreelistEmpty()); + void *batch[kMaxObjectsToMove]; + size_t popped = 0; + size_t want = 1; + char *start = static_cast(span_.start_address()); + std::vector objects(objects_per_span_); + for (size_t x = 0; x < 2; ++x) { + // Pop all objects in batches of varying size and ensure that we've got + // all objects. + for (;;) { + size_t n = span_.FreelistPopBatch(batch, want, size_); + popped += n; + EXPECT_EQ(span_.FreelistEmpty(), popped == objects_per_span_); + for (size_t i = 0; i < n; ++i) { + void *p = batch[i]; + uintptr_t off = reinterpret_cast(p) - start; + EXPECT_LT(off, span_.bytes_in_span()); + EXPECT_EQ(off % size_, 0); + size_t idx = off / size_; + EXPECT_FALSE(objects[idx]); + objects[idx] = true; + } + if (n < want) { + break; + } + ++want; + if (want > batch_size_) { + want = 1; + } + } + EXPECT_TRUE(span_.FreelistEmpty()); + EXPECT_EQ(span_.FreelistPopBatch(batch, 1, size_), 0); + EXPECT_EQ(popped, objects_per_span_); + + // Push all objects back except the last one (which would not be pushed). + for (size_t idx = 0; idx < objects_per_span_ - 1; ++idx) { + EXPECT_TRUE(objects[idx]); + bool ok = span_.FreelistPush(start + idx * size_, size_); + EXPECT_TRUE(ok); + EXPECT_FALSE(span_.FreelistEmpty()); + objects[idx] = false; + --popped; + } + // On the last iteration we can actually push the last object. + if (x == 1) { + bool ok = + span_.FreelistPush(start + (objects_per_span_ - 1) * size_, size_); + EXPECT_FALSE(ok); + } + } +} + +TEST_P(SpanTest, FreelistRandomized) { + Span &span_ = raw_span_.span(); + + char *start = static_cast(span_.start_address()); + + // Do a bunch of random pushes/pops with random batch size. + absl::BitGen rng; + absl::flat_hash_set objects; + void *batch[kMaxObjectsToMove]; + for (size_t x = 0; x < 10000; ++x) { + if (!objects.empty() && absl::Bernoulli(rng, 1.0 / 2)) { + void *p = *objects.begin(); + if (span_.FreelistPush(p, size_)) { + objects.erase(objects.begin()); + } else { + EXPECT_EQ(objects.size(), 1); + } + EXPECT_EQ(span_.FreelistEmpty(), objects_per_span_ == 1); + } else { + size_t want = absl::Uniform(rng, 0, batch_size_) + 1; + size_t n = span_.FreelistPopBatch(batch, want, size_); + if (n < want) { + EXPECT_TRUE(span_.FreelistEmpty()); + } + for (size_t i = 0; i < n; ++i) { + EXPECT_TRUE(objects.insert(batch[i]).second); + } + } + } + // Now pop everything what's there. + for (;;) { + size_t n = span_.FreelistPopBatch(batch, batch_size_, size_); + for (size_t i = 0; i < n; ++i) { + EXPECT_TRUE(objects.insert(batch[i]).second); + } + if (n < batch_size_) { + break; + } + } + // Check that we have collected all objects. + EXPECT_EQ(objects.size(), objects_per_span_); + for (void *p : objects) { + uintptr_t off = reinterpret_cast(p) - start; + EXPECT_LT(off, span_.bytes_in_span()); + EXPECT_EQ(off % size_, 0); + } +} + +INSTANTIATE_TEST_SUITE_P(All, SpanTest, testing::Range(size_t(1), kNumClasses)); + +} // namespace tcmalloc diff --git a/tcmalloc/stack_trace_table.cc b/tcmalloc/stack_trace_table.cc new file mode 100644 index 000000000..2d22a5ac0 --- /dev/null +++ b/tcmalloc/stack_trace_table.cc @@ -0,0 +1,153 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/stack_trace_table.h" + +#include +#include + +#include "absl/base/internal/spinlock.h" +#include "absl/hash/hash.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/page_heap_allocator.h" +#include "tcmalloc/sampler.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { + +bool StackTraceTable::Bucket::KeyEqual(uintptr_t h, const StackTrace& t) const { + // Do not merge entries with different sizes so that profiling tools + // can allow size-based analysis of the resulting profiles. Note + // that sizes being supplied here are already quantized (to either + // the size-class size for small objects, or a multiple of pages for + // big objects). So the number of distinct buckets kept per stack + // trace should be fairly small. + if (this->hash != h || this->trace.depth != t.depth || + this->trace.requested_size != t.requested_size || + this->trace.requested_alignment != t.requested_alignment || + // These could theoretically differ due to e.g. memalign choices. + // Split the buckets just in case that happens (though it should be rare.) + this->trace.allocated_size != t.allocated_size) { + return false; + } + for (int i = 0; i < t.depth; ++i) { + if (this->trace.stack[i] != t.stack[i]) { + return false; + } + } + return true; +} + +StackTraceTable::StackTraceTable(ProfileType type, int64_t period, bool merge, + bool unsample) + : type_(type), + period_(period), + bucket_mask_(merge ? (1 << 14) - 1 : 0), + depth_total_(0), + table_(new Bucket*[num_buckets()]()), + bucket_total_(0), + merge_(merge), + error_(false), + unsample_(unsample) { + memset(table_, 0, num_buckets() * sizeof(Bucket*)); +} + +StackTraceTable::~StackTraceTable() { + { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + for (int i = 0; i < num_buckets(); ++i) { + Bucket* b = table_[i]; + while (b != nullptr) { + Bucket* next = b->next; + Static::bucket_allocator()->Delete(b); + b = next; + } + } + } + delete[] table_; +} + +void StackTraceTable::AddTrace(double count, const StackTrace& t) { + if (error_) { + return; + } + + uintptr_t h = absl::Hash()(t); + + const int idx = h & bucket_mask_; + + Bucket* b = merge_ ? table_[idx] : nullptr; + while (b != nullptr && !b->KeyEqual(h, t)) { + b = b->next; + } + if (b != nullptr) { + b->count += count; + b->total_weight += count * t.weight; + b->trace.weight = b->total_weight / b->count + 0.5; + } else { + depth_total_ += t.depth; + bucket_total_++; + b = Static::bucket_allocator()->New(); + if (b == nullptr) { + Log(kLog, __FILE__, __LINE__, "tcmalloc: could not allocate bucket", + sizeof(*b)); + error_ = true; + } else { + b->hash = h; + b->trace = t; + b->count = count; + b->total_weight = t.weight * count; + b->next = table_[idx]; + table_[idx] = b; + } + } +} + +void StackTraceTable::Iterate( + absl::FunctionRef func) const { + if (error_) { + return; + } + + for (int i = 0; i < num_buckets(); ++i) { + Bucket* b = table_[i]; + while (b != nullptr) { + // Report total bytes that are a multiple of the object size. + size_t allocated_size = b->trace.allocated_size; + size_t requested_size = b->trace.requested_size; + + uintptr_t bytes = b->count * AllocatedBytes(b->trace, unsample_) + 0.5; + + Profile::Sample e; + // We want sum to be a multiple of allocated_size; pick the nearest + // multiple rather than always rounding up or down. + e.count = (bytes + allocated_size / 2) / allocated_size; + e.sum = e.count * allocated_size; + e.requested_size = requested_size; + e.requested_alignment = b->trace.requested_alignment; + e.allocated_size = allocated_size; + + e.depth = b->trace.depth; + static_assert(kMaxStackDepth <= Profile::Sample::kMaxStackDepth, + "Profile stack size smaller than internal stack sizes"); + memcpy(e.stack, b->trace.stack, sizeof(e.stack[0]) * e.depth); + func(e); + + b = b->next; + } + } +} + +} // namespace tcmalloc diff --git a/tcmalloc/stack_trace_table.h b/tcmalloc/stack_trace_table.h new file mode 100644 index 000000000..70eae0070 --- /dev/null +++ b/tcmalloc/stack_trace_table.h @@ -0,0 +1,93 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Utility class for coalescing sampled stack traces. Not thread-safe. + +#ifndef TCMALLOC_STACK_TRACE_TABLE_H_ +#define TCMALLOC_STACK_TRACE_TABLE_H_ + +#include + +#include + +#include "absl/base/thread_annotations.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal_malloc_extension.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { + +class StackTraceTable : public tcmalloc_internal::ProfileBase { + public: + // If merge is true, traces with identical size and stack are merged + // together. Else they are kept distinct. + // If unsample is true, Iterate() will scale counts to report estimates + // of the true total assuming traces were added by the sampler. + // REQUIRES: L < pageheap_lock + StackTraceTable(ProfileType type, int64_t period, bool merge, bool unsample); + + // REQUIRES: L < pageheap_lock + ~StackTraceTable() override; + + // base::Profile methods. + void Iterate( + absl::FunctionRef func) const override; + + int64_t Period() const override { return period_; } + + ProfileType Type() const override { return type_; } + + // Adds stack trace "t" to table with the specified count. + // The count is a floating point value to reduce rounding + // errors when accounting for sampling probabilities. + void AddTrace(double count, const StackTrace& t) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Exposed for PageHeapAllocator + struct Bucket { + // Key + uintptr_t hash; + StackTrace trace; + + // Payload + double count; + size_t total_weight; + Bucket* next; + + bool KeyEqual(uintptr_t h, const StackTrace& t) const; + }; + + // For testing + int depth_total() const { return depth_total_; } + int bucket_total() const { return bucket_total_; } + + private: + static const int kHashTableSize = 1 << 14; // => table_ is 128k + + ProfileType type_; + int64_t period_; + int bucket_mask_; + int depth_total_; + Bucket** table_; + int bucket_total_; + bool merge_; + bool error_; + bool unsample_; + + int num_buckets() const { return bucket_mask_ + 1; } +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_STACK_TRACE_TABLE_H_ diff --git a/tcmalloc/stack_trace_table_test.cc b/tcmalloc/stack_trace_table_test.cc new file mode 100644 index 000000000..f8fa4ba58 --- /dev/null +++ b/tcmalloc/stack_trace_table_test.cc @@ -0,0 +1,294 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/stack_trace_table.h" + +#include + +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/attributes.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/macros.h" +#include "absl/debugging/stacktrace.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { +namespace { + +// Rather than deal with heap allocating stack/tags, AllocationEntry contains +// them inline. +struct AllocationEntry { + int64_t sum; + int count; + size_t requested_size; + size_t requested_alignment; + size_t allocated_size; + int depth; + void* stack[64]; + + friend bool operator==(const AllocationEntry& x, const AllocationEntry& y); + friend bool operator!=(const AllocationEntry& x, const AllocationEntry& y) { + return !(x == y); + } + + friend std::ostream& operator<<(std::ostream& os, const AllocationEntry& e) { + os << "sum = " << e.sum << "; "; + os << "count = " << e.count << "; "; + + std::vector ptrs; + for (int i = 0; i < e.depth; i++) { + ptrs.push_back(absl::StrFormat("%p", e.stack[i])); + } + os << "stack = [" << absl::StrJoin(ptrs, ", ") << "]; "; + + os << "requested_size = " << e.requested_size << "; "; + os << "requested_alignment = " << e.requested_alignment << "; "; + os << "allocated_size = " << e.allocated_size << "; "; + return os; + } +}; + +inline bool operator==(const AllocationEntry& x, const AllocationEntry& y) { + if (x.sum != y.sum) { + return false; + } + + if (x.count != y.count) { + return false; + } + + if (x.depth != y.depth) { + return false; + } + + if (x.depth > 0 && !std::equal(x.stack, x.stack + x.depth, y.stack)) { + return false; + } + + if (x.requested_size != y.requested_size) { + return false; + } + + if (x.requested_alignment != y.requested_alignment) { + return false; + } + + if (x.allocated_size != y.allocated_size) { + return false; + } + + return true; +} + +void CheckTraces(const StackTraceTable& table, + std::initializer_list expected) { + std::vector actual; + + table.Iterate([&](const Profile::Sample& e) { + AllocationEntry tmp; + tmp.sum = e.sum; + tmp.count = e.count; + tmp.depth = e.depth; + ASSERT_LE(tmp.depth, ABSL_ARRAYSIZE(tmp.stack)); + std::copy(e.stack, e.stack + e.depth, tmp.stack); + + tmp.requested_size = e.requested_size; + tmp.requested_alignment = e.requested_alignment; + tmp.allocated_size = e.allocated_size; + + actual.push_back(tmp); + }); + + EXPECT_THAT(actual, testing::UnorderedElementsAreArray(expected)); +} + +void AddTrace(StackTraceTable* table, double count, const StackTrace& t) { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + table->AddTrace(count, t); +} + +TEST(StackTraceTableTest, StackTraceTable) { + // If this test is not linked against TCMalloc, the global arena used for + // StackTraceTable's buckets will not be initialized. + Static::InitIfNecessary(); + + // Empty table + { + SCOPED_TRACE("empty"); + + StackTraceTable table(ProfileType::kHeap, 1, true, false); + EXPECT_EQ(0, table.depth_total()); + EXPECT_EQ(0, table.bucket_total()); + + CheckTraces(table, {}); + } + + StackTrace t1 = {}; + t1.requested_size = static_cast(512); + t1.requested_alignment = static_cast(16); + t1.allocated_size = static_cast(1024); + t1.depth = static_cast(2); + t1.stack[0] = reinterpret_cast(1); + t1.stack[1] = reinterpret_cast(2); + + const AllocationEntry k1 = { + 1024, + 1, + 512, + 16, + 1024, + 2, + {reinterpret_cast(1), reinterpret_cast(2)}, + }; + + StackTrace t2 = {}; + t2.requested_size = static_cast(375); + t2.requested_alignment = static_cast(0); + t2.allocated_size = static_cast(512); + t2.depth = static_cast(2); + t2.stack[0] = reinterpret_cast(2); + t2.stack[1] = reinterpret_cast(1); + + const AllocationEntry k2 = { + 512, + 1, + 375, + 0, + 512, + 2, + {reinterpret_cast(2), reinterpret_cast(1)}, + }; + + // Table w/ just t1 + { + SCOPED_TRACE("t1"); + + StackTraceTable table(ProfileType::kHeap, 1, true, false); + AddTrace(&table, 1.0, t1); + EXPECT_EQ(2, table.depth_total()); + EXPECT_EQ(1, table.bucket_total()); + + CheckTraces(table, {k1}); + } + + // Table w/ t1, t2 + { + SCOPED_TRACE("t1, t2"); + + StackTraceTable table(ProfileType::kHeap, 1, true, false); + AddTrace(&table, 1.0, t1); + AddTrace(&table, 1.0, t2); + EXPECT_EQ(4, table.depth_total()); + EXPECT_EQ(2, table.bucket_total()); + CheckTraces(table, {k1, k2}); + } + + // Table w/ 1.6 x t1, 1 x t2. + // Note that t1's 1.6 count will be rounded-up to 2.0. + { + SCOPED_TRACE("1.6 t1, t2"); + + StackTraceTable table(ProfileType::kHeap, 1, true, false); + AddTrace(&table, 0.4, t1); + AddTrace(&table, 1.0, t2); + AddTrace(&table, 1.2, t1); + EXPECT_EQ(4, table.depth_total()); + EXPECT_EQ(2, table.bucket_total()); + + const AllocationEntry scaled_k1 = { + 2048, + 2, + 512, + 16, + 1024, + 2, + {reinterpret_cast(1), reinterpret_cast(2)}, + }; + + CheckTraces(table, {scaled_k1, k2}); + } + + // Same stack as t1, but w/ different size + StackTrace t3 = {}; + t3.requested_size = static_cast(13); + t3.requested_alignment = static_cast(0); + t3.allocated_size = static_cast(17); + t3.depth = static_cast(2); + t3.stack[0] = reinterpret_cast(1); + t3.stack[1] = reinterpret_cast(2); + + const AllocationEntry k3 = { + 17, + 1, + 13, + 0, + 17, + 2, + {reinterpret_cast(1), reinterpret_cast(2)}, + }; + + // Table w/ t1, t3 + { + SCOPED_TRACE("t1, t3"); + + StackTraceTable table(ProfileType::kHeap, 1, true, false); + AddTrace(&table, 1.0, t1); + AddTrace(&table, 1.0, t3); + EXPECT_EQ(4, table.depth_total()); + EXPECT_EQ(2, table.bucket_total()); + + CheckTraces(table, {k1, k3}); + } + + // Same stack as t1, but w/ different alignment + StackTrace t4; + t4.requested_size = static_cast(512); + t4.requested_alignment = static_cast(32); + t4.allocated_size = static_cast(1024); + t4.depth = static_cast(2); + t4.stack[0] = reinterpret_cast(1); + t4.stack[1] = reinterpret_cast(2); + + const AllocationEntry k4 = { + 1024, + 1, + 512, + 32, + 1024, + 2, + {reinterpret_cast(1), reinterpret_cast(2)}, + }; + + // Table w/ t1, t4 + { + SCOPED_TRACE("t1, t4"); + + StackTraceTable table(ProfileType::kHeap, 1, true, false); + AddTrace(&table, 1.0, t1); + AddTrace(&table, 1.0, t4); + EXPECT_EQ(4, table.depth_total()); + EXPECT_EQ(2, table.bucket_total()); + + CheckTraces(table, {k1, k4}); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/static_vars.cc b/tcmalloc/static_vars.cc new file mode 100644 index 000000000..218880cf0 --- /dev/null +++ b/tcmalloc/static_vars.cc @@ -0,0 +1,116 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/static_vars.h" + +#include + +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/macros.h" +#include "tcmalloc/cpu_cache.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/mincore.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/pagemap.h" +#include "tcmalloc/sampler.h" +#include "tcmalloc/thread_cache.h" +#include "tcmalloc/tracking.h" + +namespace tcmalloc { + +// Cacheline-align our SizeMap and CPUCache. They both have very hot arrays as +// their first member variables, and aligning them reduces the number of cache +// lines these arrays use. +// +// IF YOU ADD TO THIS LIST, ADD TO STATIC_VAR_SIZE TOO! +absl::base_internal::SpinLock pageheap_lock( + absl::base_internal::kLinkerInitialized); +Arena Static::arena_; +SizeMap ABSL_CACHELINE_ALIGNED Static::sizemap_; +TransferCache Static::transfer_cache_[kNumClasses]; +CPUCache ABSL_CACHELINE_ALIGNED Static::cpu_cache_; +PageHeapAllocator Static::span_allocator_; +PageHeapAllocator Static::stacktrace_allocator_; +PageHeapAllocator Static::threadcache_allocator_; +SpanList Static::sampled_objects_; +ABSL_CONST_INIT tcmalloc_internal::StatsCounter Static::sampled_objects_size_; +PeakHeapTracker Static::peak_heap_tracker_; +PageHeapAllocator Static::bucket_allocator_; +ABSL_CONST_INIT std::atomic Static::inited_{false}; +bool Static::cpu_cache_active_; +Static::PageAllocatorStorage Static::page_allocator_; +PageMap Static::pagemap_; +absl::base_internal::SpinLock guarded_page_lock( + absl::base_internal::kLinkerInitialized); +GuardedPageAllocator Static::guardedpage_allocator_; + +size_t Static::metadata_bytes() { + // This is ugly and doesn't nicely account for e.g. alignment losses + // -- I'd like to put all the above in a struct and take that + // struct's size. But we can't due to linking issues. + const size_t static_var_size = + sizeof(pageheap_lock) + sizeof(arena_) + sizeof(sizemap_) + + sizeof(transfer_cache_) + sizeof(cpu_cache_) + sizeof(span_allocator_) + + sizeof(stacktrace_allocator_) + sizeof(threadcache_allocator_) + + sizeof(sampled_objects_) + sizeof(bucket_allocator_) + + sizeof(inited_) + sizeof(cpu_cache_active_) + sizeof(page_allocator_) + + sizeof(pagemap_) + sizeof(sampled_objects_size_) + + sizeof(peak_heap_tracker_) + sizeof(guarded_page_lock) + + sizeof(guardedpage_allocator_); + + const size_t allocated = arena()->bytes_allocated() + + AddressRegionFactory::InternalBytesAllocated(); + return allocated + static_var_size; +} + +size_t Static::pagemap_residence() { + // Determine residence of the root node of the pagemap. + size_t total = MInCore::residence(&pagemap_, sizeof(pagemap_)); + return total; +} + +ABSL_ATTRIBUTE_COLD ABSL_ATTRIBUTE_NOINLINE void Static::SlowInitIfNecessary() { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + + // double-checked locking + if (!inited_.load(std::memory_order_acquire)) { + tracking::Init(); + arena_.Init(); + sizemap_.Init(); + span_allocator_.Init(&arena_); + span_allocator_.New(); // Reduce cache conflicts + span_allocator_.New(); // Reduce cache conflicts + stacktrace_allocator_.Init(&arena_); + bucket_allocator_.Init(&arena_); + peak_heap_tracker_.Init(); + // Do a bit of sanitizing: make sure central_cache is aligned properly + CHECK_CONDITION((sizeof(transfer_cache_[0]) % 64) == 0); + for (int i = 0; i < kNumClasses; ++i) { + transfer_cache_[i].Init(i); + } + new (page_allocator_.memory) PageAllocator; + sampled_objects_.Init(); + threadcache_allocator_.Init(&arena_); + cpu_cache_active_ = false; + pagemap_.MapRootWithSmallPages(); + guardedpage_allocator_.Init(/*max_alloced_pages=*/64, /*total_pages=*/128); + inited_.store(true, std::memory_order_release); + } +} + +} // namespace tcmalloc diff --git a/tcmalloc/static_vars.h b/tcmalloc/static_vars.h new file mode 100644 index 000000000..b0e763a8f --- /dev/null +++ b/tcmalloc/static_vars.h @@ -0,0 +1,198 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Static variables shared by multiple classes. + +#ifndef TCMALLOC_STATIC_VARS_H_ +#define TCMALLOC_STATIC_VARS_H_ + +#include +#include +#include + +#include + +#include "absl/base/attributes.h" +#include "absl/base/optimization.h" +#include "absl/base/thread_annotations.h" +#include "tcmalloc/arena.h" +#include "tcmalloc/common.h" +#include "tcmalloc/guarded_page_allocator.h" +#include "tcmalloc/internal/atomic_stats_counter.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/percpu.h" +#include "tcmalloc/page_allocator.h" +#include "tcmalloc/page_heap.h" +#include "tcmalloc/page_heap_allocator.h" +#include "tcmalloc/peak_heap_tracker.h" +#include "tcmalloc/span.h" +#include "tcmalloc/stack_trace_table.h" +#include "tcmalloc/transfer_cache.h" + +namespace tcmalloc { + +class CPUCache; +class PageMap; +class ThreadCache; + +class Static { + public: + // True if InitIfNecessary() has run to completion. + static bool IsInited(); + // Must be called before calling any of the accessors below. + // Safe to call multiple times. + static void InitIfNecessary(); + + // Central cache -- an array of free-lists, one per size-class. + // We have a separate lock per free-list to reduce contention. + static TransferCache* transfer_cache() { return transfer_cache_; } + + static SizeMap* sizemap() { return &sizemap_; } + + static CPUCache* cpu_cache() { return &cpu_cache_; } + + static PeakHeapTracker* peak_heap_tracker() { return &peak_heap_tracker_; } + + ////////////////////////////////////////////////////////////////////// + // In addition to the explicit initialization comment, the variables below + // must be protected by pageheap_lock. + + static Arena* arena() { return &arena_; } + + // Page-level allocator. + static PageAllocator* page_allocator() { + return reinterpret_cast(page_allocator_.memory); + } + + static PageMap* pagemap() { return &pagemap_; } + + static GuardedPageAllocator* guardedpage_allocator() { + return &guardedpage_allocator_; + } + + static PageHeapAllocator* span_allocator() { return &span_allocator_; } + + static PageHeapAllocator* stacktrace_allocator() { + return &stacktrace_allocator_; + } + + static PageHeapAllocator* threadcache_allocator() { + return &threadcache_allocator_; + } + + // State kept for sampled allocations (/heapz support). The StatsCounter is + // only written while holding pageheap_lock, so writes can safely use + // LossyAdd and reads do not require locking. + static SpanList sampled_objects_ GUARDED_BY(pageheap_lock); + static tcmalloc_internal::StatsCounter sampled_objects_size_; + static PageHeapAllocator* bucket_allocator() { + return &bucket_allocator_; + } + + static bool ABSL_ATTRIBUTE_ALWAYS_INLINE CPUCacheActive() { + return cpu_cache_active_; + } + static void ActivateCPUCache() { cpu_cache_active_ = true; } + + static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() { + return +#ifndef TCMALLOC_DEPRECATED_PERTHREAD + // When the per-cpu cache is enabled, and the thread's current cpu + // variable is initialized we will try to allocate from the per-cpu + // cache. If something fails, we bail out to the full malloc. + // Checking the current cpu variable here allows us to remove it from + // the fast-path, since we will fall back to the slow path until this + // variable is initialized. + CPUCacheActive() & subtle::percpu::IsFastNoInit(); +#else + !CPUCacheActive(); +#endif + } + + static size_t metadata_bytes() EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // The root of the pagemap is potentially a large poorly utilized + // structure, so figure out how much of it is actually resident. + static size_t pagemap_residence(); + + private: +#if defined(__clang__) + __attribute__((preserve_most)) +#endif + static void + SlowInitIfNecessary(); + + // These static variables require explicit initialization. We cannot + // count on their constructors to do any initialization because other + // static variables may try to allocate memory before these variables + // can run their constructors. + + static Arena arena_; + static SizeMap sizemap_; + static TransferCache transfer_cache_[kNumClasses]; + static CPUCache cpu_cache_; + static GuardedPageAllocator guardedpage_allocator_; + static PageHeapAllocator span_allocator_; + static PageHeapAllocator stacktrace_allocator_; + static PageHeapAllocator threadcache_allocator_; + static PageHeapAllocator bucket_allocator_; + static std::atomic inited_; + static bool cpu_cache_active_; + static PeakHeapTracker peak_heap_tracker_; + + // PageHeap uses a constructor for initialization. Like the members above, + // we can't depend on initialization order, so pageheap is new'd + // into this buffer. + union PageAllocatorStorage { + char memory[sizeof(PageAllocator)]; + uintptr_t extra; // To force alignment + }; + + static PageAllocatorStorage page_allocator_; + static PageMap pagemap_; +}; + +inline bool Static::IsInited() { + return inited_.load(std::memory_order_acquire); +} + +inline void Static::InitIfNecessary() { + if (ABSL_PREDICT_FALSE(!IsInited())) { + SlowInitIfNecessary(); + } +} + +// Why are these functions here? Because we want to inline them, but they +// need access to Static::span_allocator. Putting them in span.h would lead +// to nasty dependency loops. Since anything that needs them certainly +// includes static_vars.h, this is a perfectly good compromise. +// TODO(b/134687001): move span_allocator to Span, getting rid of the need for +// this. +inline Span* Span::New(PageID p, Length len) { + Span* result = Static::span_allocator()->New(); + result->Init(p, len); + return result; +} + +inline void Span::Delete(Span* span) { +#ifndef NDEBUG + // In debug mode, trash the contents of deleted Spans + memset(static_cast(span), 0x3f, sizeof(*span)); +#endif + Static::span_allocator()->Delete(span); +} + +} // namespace tcmalloc + +#endif // TCMALLOC_STATIC_VARS_H_ diff --git a/tcmalloc/stats.cc b/tcmalloc/stats.cc new file mode 100644 index 000000000..81ca66421 --- /dev/null +++ b/tcmalloc/stats.cc @@ -0,0 +1,617 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/stats.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include "absl/base/dynamic_annotations.h" +#include "absl/base/internal/cycleclock.h" +#include "absl/base/macros.h" +#include "absl/debugging/internal/vdso_support.h" +#include "absl/strings/string_view.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/bits.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/util.h" + +namespace tcmalloc { + +static double BytesToMiB(size_t bytes) { + const double MiB = 1048576.0; + return bytes / MiB; +} + +static double PagesToMiB(uint64_t pages) { + return BytesToMiB(pages * kPageSize); +} + +// For example, PrintRightAdjustedWithPrefix(out, ">=", 42, 6) prints " >=42". +static void PrintRightAdjustedWithPrefix(TCMalloc_Printer *out, + const char *prefix, int num, + int width) { + width -= strlen(prefix); + int num_tmp = num; + for (int i = 0; i < width - 1; i++) { + num_tmp /= 10; + if (num_tmp == 0) { + out->printf(" "); + } + } + out->printf("%s%d", prefix, num); +} + +void PrintStats(const char *label, TCMalloc_Printer *out, + const BackingStats &backing, const SmallSpanStats &small, + const LargeSpanStats &large, bool everything) { + size_t nonempty_sizes = 0; + for (int i = 0; i < kMaxPages; ++i) { + const size_t norm = small.normal_length[i]; + const size_t ret = small.returned_length[i]; + if (norm + ret > 0) nonempty_sizes++; + } + + out->printf("------------------------------------------------\n"); + out->printf("%s: %zu sizes; %6.1f MiB free; %6.1f MiB unmapped\n", label, + nonempty_sizes, BytesToMiB(backing.free_bytes), + BytesToMiB(backing.unmapped_bytes)); + out->printf("------------------------------------------------\n"); + + size_t cum_normal_pages = 0, cum_returned_pages = 0, cum_total_pages = 0; + if (!everything) return; + + for (size_t i = 0; i < kMaxPages; ++i) { + const size_t norm = small.normal_length[i]; + const size_t ret = small.returned_length[i]; + const size_t total = norm + ret; + if (total == 0) continue; + const size_t norm_pages = norm * i; + const size_t ret_pages = ret * i; + const size_t total_pages = norm_pages + ret_pages; + cum_normal_pages += norm_pages; + cum_returned_pages += ret_pages; + cum_total_pages += total_pages; + out->printf( + "%6zu pages * %6zu spans ~ %6.1f MiB; %6.1f MiB cum" + "; unmapped: %6.1f MiB; %6.1f MiB cum\n", + i, total, PagesToMiB(total_pages), PagesToMiB(cum_total_pages), + PagesToMiB(ret_pages), PagesToMiB(cum_returned_pages)); + } + + cum_normal_pages += large.normal_pages; + cum_returned_pages += large.returned_pages; + const size_t large_total_pages = large.normal_pages + large.returned_pages; + cum_total_pages += large_total_pages; + PrintRightAdjustedWithPrefix(out, ">=", kMaxPages, 6); + out->printf( + " large * %6zu spans ~ %6.1f MiB; %6.1f MiB cum" + "; unmapped: %6.1f MiB; %6.1f MiB cum\n", + static_cast(large.spans), PagesToMiB(large_total_pages), + PagesToMiB(cum_total_pages), PagesToMiB(large.returned_pages), + PagesToMiB(cum_returned_pages)); +} + +struct HistBucket { + uint64_t min_sec; + const char *label; +}; + +static const HistBucket kSpanAgeHistBuckets[] = { + // clang-format off + {0, "<1s"}, + {1, "1s"}, + {30, "30s"}, + {1 * 60, "1m"}, + {30 * 60, "30m"}, + {1 * 60 * 60, "1h"}, + {8 * 60 * 60, "8+h"}, + // clang-format on +}; + +struct PageHeapEntry { + int64_t span_size; // bytes + int64_t present; // bytes + int64_t released; // bytes + int64_t num_spans; + double avg_live_age_secs; + double avg_released_age_secs; + int64_t live_age_hist_bytes[PageAgeHistograms::kNumBuckets] = {0, 0, 0, 0, + 0, 0, 0}; + int64_t released_age_hist_bytes[PageAgeHistograms::kNumBuckets] = {0, 0, 0, 0, + 0, 0, 0}; + + void PrintInPbtxt(PbtxtRegion *parent, + absl::string_view sub_region_name) const; +}; + +void PageHeapEntry::PrintInPbtxt(PbtxtRegion *parent, + absl::string_view sub_region_name) const { + auto page_heap = parent->CreateSubRegion(sub_region_name); + page_heap.PrintI64("span_size", span_size); + page_heap.PrintI64("present", present); + page_heap.PrintI64("released", released); + page_heap.PrintI64("num_spans", num_spans); + page_heap.PrintDouble("avg_live_age_secs", avg_live_age_secs); + page_heap.PrintDouble("avg_released_age_secs", avg_released_age_secs); + + for (int j = 0; j < PageAgeHistograms::kNumBuckets; j++) { + uint64_t min_age_secs = kSpanAgeHistBuckets[j].min_sec; + uint64_t max_age_secs = j != PageAgeHistograms::kNumBuckets - 1 + ? kSpanAgeHistBuckets[j + 1].min_sec + : INT_MAX; + if (live_age_hist_bytes[j] != 0) { + auto live_age_hist = page_heap.CreateSubRegion("live_age_hist"); + live_age_hist.PrintI64("bytes", live_age_hist_bytes[j]); + live_age_hist.PrintI64("min_age_secs", min_age_secs); + live_age_hist.PrintI64("max_age_secs", max_age_secs); + } + if (released_age_hist_bytes[j] != 0) { + auto released_age_hist = page_heap.CreateSubRegion("released_age_hist"); + released_age_hist.PrintI64("bytes", released_age_hist_bytes[j]); + released_age_hist.PrintI64("min_age_secs", min_age_secs); + released_age_hist.PrintI64("max_age_secs", max_age_secs); + } + } +} + +void PrintStatsInPbtxt(PbtxtRegion *region, const SmallSpanStats &small, + const LargeSpanStats &large, + const PageAgeHistograms &ages) { + // Print for small pages. + for (size_t i = 0; i < kMaxPages; ++i) { + const size_t norm = small.normal_length[i]; + const size_t ret = small.returned_length[i]; + const size_t total = norm + ret; + if (total == 0) continue; + const size_t norm_pages = norm * i; + const size_t ret_pages = ret * i; + PageHeapEntry entry; + entry.span_size = i * kPageSize; + entry.present = norm_pages * kPageSize; + entry.released = ret_pages * kPageSize; + entry.num_spans = total; + + // Histogram is only collected for pages < ages.kNumSize. + if (i < PageAgeHistograms::kNumSizes) { + entry.avg_live_age_secs = + ages.GetSmallHistogram(/*released=*/false, i)->avg_age(); + entry.avg_released_age_secs = + ages.GetSmallHistogram(/*released=*/true, i)->avg_age(); + for (int j = 0; j < ages.kNumBuckets; j++) { + entry.live_age_hist_bytes[j] = + ages.GetSmallHistogram(/*released=*/false, i)->pages_in_bucket(j) * + kPageSize; + entry.released_age_hist_bytes[j] = + ages.GetSmallHistogram(/*released=*/true, i)->pages_in_bucket(j) * + kPageSize; + } + } + entry.PrintInPbtxt(region, "page_heap"); + } + + // Print for large page. + { + PageHeapEntry entry; + entry.span_size = -1; + entry.num_spans = large.spans; + entry.present = large.normal_pages * kPageSize; + entry.released = large.returned_pages * kPageSize; + entry.avg_live_age_secs = + ages.GetLargeHistogram(/*released=*/false)->avg_age(); + entry.avg_released_age_secs = + ages.GetLargeHistogram(/*released=*/true)->avg_age(); + for (int j = 0; j < ages.kNumBuckets; j++) { + entry.live_age_hist_bytes[j] = + ages.GetLargeHistogram(/*released=*/false)->pages_in_bucket(j) * + kPageSize; + entry.released_age_hist_bytes[j] = + ages.GetLargeHistogram(/*released=*/true)->pages_in_bucket(j) * + kPageSize; + } + entry.PrintInPbtxt(region, "page_heap"); + } + + region->PrintI64("min_large_span_size", kMaxPages); +} + +static int HistBucketIndex(double age_exact) { + uint64_t age_secs = age_exact; // truncate to seconds + for (int i = 0; i < ABSL_ARRAYSIZE(kSpanAgeHistBuckets) - 1; i++) { + if (age_secs < kSpanAgeHistBuckets[i + 1].min_sec) { + return i; + } + } + return ABSL_ARRAYSIZE(kSpanAgeHistBuckets) - 1; +} + +PageAgeHistograms::PageAgeHistograms(int64_t now) + : now_(now), freq_(absl::base_internal::CycleClock::Frequency()) { + static_assert( + PageAgeHistograms::kNumBuckets == ABSL_ARRAYSIZE(kSpanAgeHistBuckets), + "buckets don't match constant in header"); + + memset(&live_, 0, sizeof(live_)); + memset(&returned_, 0, sizeof(returned_)); +} + +void PageAgeHistograms::RecordRange(Length pages, bool released, int64_t when) { + double age = std::max(0.0, (now_ - when) / freq_); + (released ? returned_ : live_).Record(pages, age); +} + +void PageAgeHistograms::PerSizeHistograms::Record(Length pages, double age) { + (pages < kLargeSize ? GetSmall(pages) : GetLarge())->Record(pages, age); + total.Record(pages, age); +} + +static uint32_t SaturatingAdd(uint32_t x, uint32_t y) { + uint32_t z = x + y; + if (z < x) z = std::numeric_limits::max(); + return z; +} + +void PageAgeHistograms::Histogram::Record(Length pages, double age) { + size_t bucket = HistBucketIndex(age); + buckets_[bucket] = SaturatingAdd(buckets_[bucket], pages); + total_pages_ += pages; + total_age_ += pages * age; +} + +void PageAgeHistograms::Print(const char *label, TCMalloc_Printer *out) const { + out->printf("------------------------------------------------\n"); + out->printf( + "%s cache entry age (count of pages in spans of " + "a given size that have been idle for up to the given period of time)\n", + label); + out->printf("------------------------------------------------\n"); + out->printf(" "); + // Print out the table header. All columns have width 8 chars. + out->printf(" mean"); + for (int b = 0; b < kNumBuckets; b++) { + out->printf("%8s", kSpanAgeHistBuckets[b].label); + } + out->printf("\n"); + + live_.Print("Live span", out); + out->printf("\n"); + returned_.Print("Unmapped span", out); +} + +static void PrintLineHeader(TCMalloc_Printer *out, const char *kind, + const char *prefix, int num) { + // Print the beginning of the line, e.g. "Live span, >=128 pages: ". The + // span size ("128" in the example) is padded such that it plus the span + // prefix ("Live") plus the span size prefix (">=") is kHeaderExtraChars wide. + const int kHeaderExtraChars = 19; + const int span_size_width = + std::max(0, kHeaderExtraChars - strlen(kind)); + out->printf("%s, ", kind); + PrintRightAdjustedWithPrefix(out, prefix, num, span_size_width); + out->printf(" pages: "); +} + +void PageAgeHistograms::PerSizeHistograms::Print(const char *kind, + TCMalloc_Printer *out) const { + out->printf("%-15s TOTAL PAGES: ", kind); + total.Print(out); + + for (Length l = 1; l < kNumSizes; ++l) { + const Histogram *here = &small[l - 1]; + if (here->empty()) continue; + PrintLineHeader(out, kind, "", l); + here->Print(out); + } + + if (!large.empty()) { + PrintLineHeader(out, kind, ">=", kNumSizes); + large.Print(out); + } +} + +void PageAgeHistograms::Histogram::Print(TCMalloc_Printer *out) const { + const double mean = avg_age(); + out->printf(" %7.1f", mean); + for (int b = 0; b < kNumBuckets; ++b) { + out->printf(" %7" PRIu32, buckets_[b]); + } + + out->printf("\n"); +} + +void PageAllocInfo::Print(TCMalloc_Printer *out) const { + int64_t ns = TimeNanos(); + double hz = (1000.0 * 1000 * 1000) / ns; + out->printf("%s: stats on allocation sizes\n", label_); + out->printf("%s: %zu pages live small allocation\n", label_, total_small_); + out->printf("%s: %zu pages of slack on large allocations\n", label_, + total_slack_); + out->printf("%s: largest seen allocation %zu pages\n", label_, largest_seen_); + out->printf("%s: per-size information:\n", label_); + + auto print_counts = [this, hz, out](const Counts &c, Length nmin, + Length nmax) { + const size_t a = c.nalloc; + const size_t f = c.nfree; + const size_t a_pages = c.alloc_size; + const size_t f_pages = c.free_size; + if (a == 0) return; + const size_t live = a - f; + const double live_mib = BytesToMiB((a_pages - f_pages) * kPageSize); + const double rate_hz = a * hz; + const double mib_hz = BytesToMiB(a_pages * kPageSize) * hz; + if (nmin == nmax) { + out->printf("%s: %21zu page info: ", label_, nmin); + } else { + out->printf("%s: [ %7zu , %7zu ] page info: ", label_, nmin, nmax); + } + out->printf( + "%10zu / %10zu a/f, %8zu (%6.1f MiB) live, " + "%8.3g allocs/s (%6.1f MiB/s)\n", + a, f, live, live_mib, rate_hz, mib_hz); + }; + + for (int i = 0; i < kMaxPages; ++i) { + const Length n = i + 1; + print_counts(small_[i], n, n); + } + + for (int i = 0; i < kAddressBits - kPageShift; ++i) { + const Length nmax = static_cast(1) << i; + const Length nmin = nmax / 2 + 1; + print_counts(large_[i], nmin, nmax); + } +} + +void PageAllocInfo::PrintInPbtxt(PbtxtRegion *region, + absl::string_view stat_name) const { + int64_t ns = TimeNanos(); + double hz = (1000.0 * 1000 * 1000) / ns; + region->PrintI64("num_small_allocation_pages", total_small_); + region->PrintI64("num_slack_pages", total_slack_); + region->PrintI64("largest_allocation_pages", largest_seen_); + + auto print_counts = [hz, region, &stat_name](const Counts &c, Length nmin, + Length nmax) { + const size_t a = c.nalloc; + const size_t f = c.nfree; + const size_t a_pages = c.alloc_size; + const size_t f_pages = c.free_size; + if (a == 0) return; + const int64_t live_bytes = (a_pages - f_pages) * kPageSize; + const double rate_hz = a * hz; + const double bytes_hz = static_cast(a_pages * kPageSize) * hz; + auto stat = region->CreateSubRegion(stat_name); + stat.PrintI64("min_span_pages", nmin); + stat.PrintI64("max_span_pages", nmax); + stat.PrintI64("num_spans_allocated", a); + stat.PrintI64("num_spans_freed", f); + stat.PrintI64("live_bytes", live_bytes); + stat.PrintDouble("spans_allocated_per_second", rate_hz); + stat.PrintI64("bytes_allocated_per_second", static_cast(bytes_hz)); + }; + + for (int i = 0; i < kMaxPages; ++i) { + const Length n = i + 1; + print_counts(small_[i], n, n); + } + + for (int i = 0; i < kAddressBits - kPageShift; ++i) { + const Length nmax = static_cast(1) << i; + const Length nmin = nmax / 2 + 1; + print_counts(large_[i], nmin, nmax); + } +} + +static size_t RoundUp(size_t value, size_t alignment) { + return (value + alignment - 1) & ~(alignment - 1); +} + +void PageAllocInfo::RecordAlloc(PageID p, Length n) { + if (ABSL_PREDICT_FALSE(log_on())) { + int64_t t = TimeNanos(); + LogAlloc(t, p, n); + } + + static_assert(kMaxPages * kPageSize == 1024 * 1024, "threshold changed?"); + static_assert(kMaxPages < kPagesPerHugePage, "there should be slack"); + largest_seen_ = std::max(largest_seen_, n); + if (n <= kMaxPages) { + total_small_ += n; + small_[n - 1].Alloc(n); + } else { + Length slack = RoundUp(n, kPagesPerHugePage) - n; + total_slack_ += slack; + size_t i = tcmalloc_internal::Bits::Log2Ceiling(n); + large_[i].Alloc(n); + } +} + +void PageAllocInfo::RecordFree(PageID p, Length n) { + if (ABSL_PREDICT_FALSE(log_on())) { + int64_t t = TimeNanos(); + LogFree(t, p, n); + } + + if (n <= kMaxPages) { + total_small_ -= n; + small_[n - 1].Free(n); + } else { + Length slack = RoundUp(n, kPagesPerHugePage) - n; + total_slack_ -= slack; + size_t i = tcmalloc_internal::Bits::Log2Ceiling(n); + large_[i].Free(n); + } +} + +void PageAllocInfo::RecordRelease(Length n, Length got) { + if (ABSL_PREDICT_FALSE(log_on())) { + int64_t t = TimeNanos(); + LogRelease(t, n); + } +} + +const PageAllocInfo::Counts &PageAllocInfo::counts_for(Length n) const { + if (n <= kMaxPages) { + return small_[n - 1]; + } + size_t i = tcmalloc_internal::Bits::Log2Ceiling(n); + return large_[i]; +} + +// Our current format is really simple. We have an eight-byte version +// number as a header (currently = 1). We then follow up with a sequence +// of fixed-size events, each 16 bytes: +// - 8 byte "id" (really returned page) +// - 4 byte size (in kib, for compatibility) +// (this gets us to 4 TiB; anything larger is reported truncated) +// - 4 bytes for when (ms since last event) + what +// We shift up the when by 8 bits, and store what the event is in +// low 8 bits. (Currently just 0=alloc, 1=free, 2=Release.) +// This truncates time deltas to 2^24 ms ~= 4 hours. +// This could be compressed further. (As is, it compresses well +// with gzip.) +// All values are host-order. + +struct Entry { + uint64_t id; + uint32_t kib; + uint32_t whenwhat; +}; + +using tcmalloc::tcmalloc_internal::signal_safe_write; + +void PageAllocInfo::Write(uint64_t when, uint8_t what, PageID p, Length n) { + static_assert(sizeof(Entry) == 16, "bad sizing"); + Entry e; + // Round the time to ms *before* computing deltas, because this produces more + // accurate results in the long run. + + // Consider events that occur at absolute time 0.7ms and 50ms. If + // we take deltas first, we say the first event occurred at +0.7 = + // 0ms and the second event occurred at +49.3ms = 49ms. + // Rounding first produces 0 and 50. + const uint64_t ms = when / 1000 / 1000; + uint64_t delta_ms = ms - last_ms_; + last_ms_ = ms; + // clamping + if (delta_ms >= 1 << 24) { + delta_ms = (1 << 24) - 1; + } + e.whenwhat = delta_ms << 8 | what; + e.id = p; + size_t bytes = (n << kPageShift); + static const size_t KiB = 1024; + static const size_t kMaxRep = std::numeric_limits::max() * KiB; + if (bytes > kMaxRep) { + bytes = kMaxRep; + } + e.kib = bytes / KiB; + const char *ptr = reinterpret_cast(&e); + const size_t len = sizeof(Entry); + CHECK_CONDITION(len == signal_safe_write(fd_, ptr, len, nullptr)); +} + +PageAllocInfo::PageAllocInfo(const char *label, int log_fd) + : label_(label), fd_(log_fd) { + if (ABSL_PREDICT_FALSE(log_on())) { + // version 1 of the format, in case we change things up + uint64_t header = 1; + const char *ptr = reinterpret_cast(&header); + const size_t len = sizeof(header); + CHECK_CONDITION(len == signal_safe_write(fd_, ptr, len, nullptr)); + } +} + +int64_t PageAllocInfo::TimeNanos() const { + return GetCurrentTimeNanos() - baseline_ns_; +} + +// Why does this exist? Why not just use absl::GetCurrentTimeNanos? +// Failing that, why not just use clock_gettime? See b/65384231, but +// essentially because we can't work around people LD_PRELOADing a +// broken and unsafe clock_gettime. Since the real implementation is +// actually a VDSO function, we just go straight to there, which LD_PRELOAD +// can't interfere with. +// +// Now, of course, we can't guarantee this VDSO approach will work--we +// may be on some strange system without one, or one with a newer +// version of the symbols and no interpolating shim. But we can +// gracefully fail back to the "real" clock_gettime. Will it work if +// someone is doing something weird? Who knows, but it's no worse than +// any other option. +typedef int (*ClockGettimePointer)(clockid_t clk_id, struct timespec *tp); + +const ClockGettimePointer GetRealClock() { +#if ABSL_HAVE_ELF_MEM_IMAGE + absl::debugging_internal::VDSOSupport vdso; + absl::debugging_internal::VDSOSupport::SymbolInfo info; + // The VDSO contents aren't very consistent, so we make our best + // guesses. Each of these named and versioned symbols should be + // equivalent to just calling clock_gettime if they exist. + + // Expected on x86_64 + if (vdso.LookupSymbol("__vdso_clock_gettime", "LINUX_2.6", + absl::debugging_internal::VDSOSupport::kVDSOSymbolType, + &info)) { + return reinterpret_cast( + const_cast(info.address)); + } + + // Expected on Power + if (vdso.LookupSymbol("__kernel_clock_gettime", "LINUX_2.6.15", + absl::debugging_internal::VDSOSupport::kVDSOSymbolType, + &info)) { + return reinterpret_cast( + const_cast(info.address)); + } + // Expected on arm64 + if (vdso.LookupSymbol("__kernel_clock_gettime", "LINUX_2.6.39", + absl::debugging_internal::VDSOSupport::kVDSOSymbolType, + &info)) { + return reinterpret_cast( + const_cast(info.address)); + } +#endif + + // Hopefully this is good enough. + return &clock_gettime; +} + +int64_t GetCurrentTimeNanos() { + static const ClockGettimePointer p = GetRealClock(); + struct timespec ts; + int ret = p(CLOCK_MONOTONIC, &ts); + CHECK_CONDITION(ret == 0); + + // If we are here rather than failing from the CHECK_CONDITION, gettime (via + // p) succeeded. Since we used an unusual calling technique (directly into + // the VDSO), sanitizers cannot see that this memory has been initialized. + ANNOTATE_MEMORY_IS_INITIALIZED(&ts.tv_sec, sizeof(ts.tv_sec)); + ANNOTATE_MEMORY_IS_INITIALIZED(&ts.tv_nsec, sizeof(ts.tv_nsec)); + + int64_t s = ts.tv_sec; + int64_t ns = ts.tv_nsec; + ns += s * 1000 * 1000 * 1000; + + return ns; +} + +} // namespace tcmalloc diff --git a/tcmalloc/stats.h b/tcmalloc/stats.h new file mode 100644 index 000000000..d873a421d --- /dev/null +++ b/tcmalloc/stats.h @@ -0,0 +1,263 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_STATS_H_ +#define TCMALLOC_STATS_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { + +// Return nanoseconds since some unspecified fixed point. +// TODO(b/65384231): use absl::GetCurrentTimeNanos +int64_t GetCurrentTimeNanos(); + +struct BackingStats { + BackingStats() : system_bytes(0), free_bytes(0), unmapped_bytes(0) {} + uint64_t system_bytes; // Total bytes allocated from system + uint64_t free_bytes; // Total bytes on normal freelists + uint64_t unmapped_bytes; // Total bytes on returned freelists + + BackingStats &operator+=(BackingStats rhs) { + system_bytes += rhs.system_bytes; + free_bytes += rhs.free_bytes; + unmapped_bytes += rhs.unmapped_bytes; + return *this; + } +}; + +inline BackingStats operator+(BackingStats lhs, BackingStats rhs) { + return lhs += rhs; +} + +struct SmallSpanStats { + // For each free list of small spans, the length (in spans) of the + // normal and returned free lists for that size. + int64_t normal_length[kMaxPages]; + int64_t returned_length[kMaxPages]; + + SmallSpanStats &operator+=(SmallSpanStats rhs) { + for (size_t i = 0; i < kMaxPages; ++i) { + normal_length[i] += rhs.normal_length[i]; + returned_length[i] += rhs.returned_length[i]; + } + return *this; + } +}; + +inline SmallSpanStats operator+(SmallSpanStats lhs, SmallSpanStats rhs) { + return lhs += rhs; +} + +// Stats for free large spans (i.e., spans with more than kMaxPages pages). +struct LargeSpanStats { + int64_t spans; // Number of such spans + int64_t normal_pages; // Combined page length of normal large spans + int64_t returned_pages; // Combined page length of unmapped spans + + LargeSpanStats &operator+=(LargeSpanStats rhs) { + spans += rhs.spans; + normal_pages += rhs.normal_pages; + returned_pages += rhs.returned_pages; + return *this; + } +}; + +inline LargeSpanStats operator+(LargeSpanStats lhs, LargeSpanStats rhs) { + return lhs += rhs; +} + +void PrintStats(const char *label, TCMalloc_Printer *out, + const BackingStats &backing, const SmallSpanStats &small, + const LargeSpanStats &large, bool everything); + +class PageAgeHistograms { + public: + // assumed to be taken from absl::base_internal::CycleClock::Now (done + // like this for tests) + explicit PageAgeHistograms(int64_t now); + + // = absl::base_internal::CycleClock::Now() when the span was last + // changed. + void RecordRange(Length pages, bool released, int64_t when); + + void Print(const char *label, TCMalloc_Printer *out) const; + + static constexpr size_t kNumBuckets = 7; + static constexpr size_t kNumSizes = 64; + + static constexpr size_t kLargeSize = kNumSizes; + class Histogram { + public: + void Record(Length pages, double age); + void Print(TCMalloc_Printer *out) const; + + uint32_t pages_in_bucket(size_t i) const { return buckets_[i]; } + + uint32_t total() const { return total_pages_; } + + double avg_age() const { return empty() ? 0.0 : total_age_ / total_pages_; } + + bool empty() const { return total_pages_ == 0; } + + private: + // total number of pages fitting in this bucket We are actually + // somewhat space constrained so it's important to _not_ use a + // 64-bit counter here. This comfortably supports terabytes of + // RAM, and just in case we will update this with saturating arithmetic. + uint32_t buckets_[kNumBuckets]; + + uint64_t total_pages_; + double total_age_; + }; + + const Histogram *GetSmallHistogram(bool released, Length n) const { + if (released) { + return returned_.GetSmall(n); + } else { + return live_.GetSmall(n); + } + } + + const Histogram *GetLargeHistogram(bool released) const { + if (released) { + return returned_.GetLarge(); + } else { + return live_.GetLarge(); + } + } + + const Histogram *GetTotalHistogram(bool released) { + if (released) { + return returned_.GetTotal(); + } else { + return live_.GetTotal(); + } + } + + private: + struct PerSizeHistograms { + void Record(Length pages, double age); + void Print(const char *kind, TCMalloc_Printer *out) const; + + Histogram *GetSmall(Length n) { + CHECK_CONDITION(n < kNumSizes); + return &small[n - 1]; + } + const Histogram *GetSmall(Length n) const { + CHECK_CONDITION(n < kNumSizes); + return &small[n - 1]; + } + + Histogram *GetLarge() { return &large; } + const Histogram *GetLarge() const { return &large; } + + Histogram *GetTotal() { return &total; } + + Histogram small[kNumSizes - 1]; + Histogram large; + Histogram total; + }; + + const int64_t now_; + const double freq_; + + PerSizeHistograms live_; + PerSizeHistograms returned_; +}; + +void PrintStatsInPbtxt(PbtxtRegion *region, const SmallSpanStats &small, + const LargeSpanStats &large, + const PageAgeHistograms &ages); + +class PageAllocInfo { + private: + struct Counts; + + public: + // If log_fd >= 0, dump a page trace to it as record events come in. + PageAllocInfo(const char *label, int log_fd); + + // Subclasses are responsible for calling these methods when + // the relevant actions occur + void RecordAlloc(PageID p, Length n); + void RecordFree(PageID p, Length n); + void RecordRelease(Length n, Length got); + // And invoking this in their Print() implementation. + void Print(TCMalloc_Printer *out) const; + void PrintInPbtxt(PbtxtRegion *region, absl::string_view stat_name) const; + + // Total size of allocations < 1 MiB + Length small() const { return total_small_; } + // We define the "slack" of an allocation as the difference + // between its size and the nearest hugepage multiple (i.e. how + // much would go unused if we allocated it as an aligned hugepage + // and didn't use the rest.) + // Return the total slack of all non-small allocations. + Length slack() const { return total_slack_; } + + const Counts &counts_for(Length n) const; + + // Returns (approximate) nanoseconds since class instantiation. + int64_t TimeNanos() const; + + private: + Length total_small_{0}; + Length total_slack_{0}; + + Length largest_seen_{0}; + + // How many alloc/frees have we seen (of some size range?) + struct Counts { + // raw counts + size_t nalloc{0}, nfree{0}; + // and total sizes (needed if this struct tracks a nontrivial range + Length alloc_size{0}, free_size{0}; + + void Alloc(Length n) { + nalloc++; + alloc_size += n; + } + void Free(Length n) { + nfree++; + free_size += n; + } + }; + + // Indexed by exact length + Counts small_[kMaxPages]; + // Indexed by power-of-two-buckets + Counts large_[kAddressBits - kPageShift]; + const char *label_; + + int64_t baseline_ns_{GetCurrentTimeNanos()}; + + // State for page trace logging. + const int fd_; + uint64_t last_ms_{0}; + void Write(uint64_t when, uint8_t what, PageID p, Length n); + bool log_on() const { return fd_ >= 0; } + void LogAlloc(int64_t when, PageID p, Length n) { Write(when, 0, p, n); } + void LogFree(int64_t when, PageID p, Length n) { Write(when, 1, p, n); } + void LogRelease(int64_t when, Length n) { Write(when, 2, 0, n); } +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_STATS_H_ diff --git a/tcmalloc/stats_test.cc b/tcmalloc/stats_test.cc new file mode 100644 index 000000000..70e8f2a3e --- /dev/null +++ b/tcmalloc/stats_test.cc @@ -0,0 +1,264 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/stats.h" + +#include +#include + +#include "gtest/gtest.h" +#include "absl/base/internal/cycleclock.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" + +namespace tcmalloc { +namespace { + +class PrintTest : public ::testing::Test { + protected: + static constexpr size_t kBufferSize = 256 * 1024; + char buf_[kBufferSize]; + + void ExpectStats(const BackingStats &back, const SmallSpanStats &small, + const LargeSpanStats &large, const std::string &expected) { + TCMalloc_Printer out(&buf_[0], kBufferSize); + tcmalloc::PrintStats("PrintTest", &out, back, small, large, true); + EXPECT_EQ(expected, buf_); + } + + BackingStats Backing(size_t system, size_t free, size_t unmapped) { + BackingStats stat; + stat.system_bytes = system; + stat.free_bytes = free; + stat.unmapped_bytes = unmapped; + + return stat; + } +}; + +TEST_F(PrintTest, Empty) { + ExpectStats(Backing(0, 0, 0), {{}, {}}, // small + {0, 0, 0}, // large + // clang-format off +R"LIT(------------------------------------------------ +PrintTest: 0 sizes; 0.0 MiB free; 0.0 MiB unmapped +------------------------------------------------ + >=128 large * 0 spans ~ 0.0 MiB; 0.0 MiB cum; unmapped: 0.0 MiB; 0.0 MiB cum +)LIT"); + // clang-format on +} + +TEST_F(PrintTest, ManySizes) { + ExpectStats(Backing(987654321, 1900 * 1000, 67 * 1000 * 1000), + {{0, 100, 0, 250, 0, 0, 0, 0, 0, 51}, + {0, 0, 300, 400, 0, 0, 0, 0, 0, 27}}, // small + {2, 100000, 2000}, // large + // clang-format off +R"LIT(------------------------------------------------ +PrintTest: 4 sizes; 1.8 MiB free; 63.9 MiB unmapped +------------------------------------------------ + 1 pages * 100 spans ~ 0.8 MiB; 0.8 MiB cum; unmapped: 0.0 MiB; 0.0 MiB cum + 2 pages * 300 spans ~ 4.7 MiB; 5.5 MiB cum; unmapped: 4.7 MiB; 4.7 MiB cum + 3 pages * 650 spans ~ 15.2 MiB; 20.7 MiB cum; unmapped: 9.4 MiB; 14.1 MiB cum + 9 pages * 78 spans ~ 5.5 MiB; 26.2 MiB cum; unmapped: 1.9 MiB; 16.0 MiB cum + >=128 large * 2 spans ~ 796.9 MiB; 823.1 MiB cum; unmapped: 15.6 MiB; 31.6 MiB cum +)LIT"); + // clang-format on +} + +class AgeTest : public testing::Test { + protected: + static constexpr size_t kBufferSize = 256 * 1024; + char buf_[kBufferSize]; + + static const int64_t kNow = 1000ll * 1000 * 1000 * 1000; + + // correct "when" value to compute age as + int64_t WhenForAge(double age) { + static double freq = absl::base_internal::CycleClock::Frequency(); + // age = (now - when) / freq + return kNow - freq * age; + } + + void ExpectAges(const tcmalloc::PageAgeHistograms &ages, + const std::string &expected) { + TCMalloc_Printer out(&buf_[0], kBufferSize); + ages.Print("AgeTest", &out); + std::string got = buf_; + EXPECT_EQ(expected, got); + } +}; + +TEST_F(AgeTest, Basic) { + tcmalloc::PageAgeHistograms ages(kNow); + ages.RecordRange(1, false, WhenForAge(0.5)); + ages.RecordRange(1, false, WhenForAge(1.2)); + ages.RecordRange(1, false, WhenForAge(3.7)); + + ages.RecordRange(3, false, WhenForAge(60 * 60 * 10)); + + for (int i = 0; i < 10; ++i) { + ages.RecordRange(2, true, WhenForAge(0.1)); + } + ages.RecordRange(2, true, WhenForAge(10 * 60 + 5)); + + ages.RecordRange(200, true, WhenForAge(10 * 60)); + // clang-format off + const char kExpected[] = +R"LIT(------------------------------------------------ +AgeTest cache entry age (count of pages in spans of a given size that have been idle for up to the given period of time) +------------------------------------------------ + mean <1s 1s 30s 1m 30m 1h 8+h +Live span TOTAL PAGES: 18000.9 1 2 0 0 0 0 3 +Live span, 1 pages: 1.8 1 2 0 0 0 0 0 +Live span, 3 pages: 36000.0 0 0 0 0 0 0 3 + +Unmapped span TOTAL PAGES: 546.0 20 0 0 202 0 0 0 +Unmapped span, 2 pages: 55.1 20 0 0 2 0 0 0 +Unmapped span, >=64 pages: 600.0 0 0 0 200 0 0 0 +)LIT"; + // clang-format on + ExpectAges(ages, kExpected); +} + +TEST_F(AgeTest, Overflow) { + tcmalloc::PageAgeHistograms ages(kNow); + const uint32_t too_big = 4 * (std::numeric_limits::max() / 5); + ages.RecordRange(too_big, false, WhenForAge(0.5)); + ages.RecordRange(too_big, false, WhenForAge(0.5)); + + // clang-format off + const char kExpected[] = +R"LIT(------------------------------------------------ +AgeTest cache entry age (count of pages in spans of a given size that have been idle for up to the given period of time) +------------------------------------------------ + mean <1s 1s 30s 1m 30m 1h 8+h +Live span TOTAL PAGES: 0.5 4294967295 0 0 0 0 0 0 +Live span, >=64 pages: 0.5 4294967295 0 0 0 0 0 0 + +Unmapped span TOTAL PAGES: 0.0 0 0 0 0 0 0 0 +)LIT"; + // clang-format on + ExpectAges(ages, kExpected); +} + +TEST_F(AgeTest, ManySizes) { + tcmalloc::PageAgeHistograms ages(kNow); + const Length N = tcmalloc::PageAgeHistograms::kLargeSize; + for (Length i = 1; i <= N; ++i) { + ages.RecordRange(i, false, WhenForAge(i * 3)); + } + + for (Length i = 1; i < N; ++i) { + auto hist = ages.GetSmallHistogram(false, i); + EXPECT_EQ(i, hist->total()); + EXPECT_FLOAT_EQ(i * 3, hist->avg_age()); + } + + auto large = ages.GetLargeHistogram(false); + EXPECT_EQ(N, large->total()); + EXPECT_FLOAT_EQ(N * 3, large->avg_age()); + + auto total = ages.GetTotalHistogram(false); + // sum_{i = 1}^N i = n(n+1)/2 + EXPECT_EQ(N * (N + 1) / 2, total->total()); + // sum_{i = 1}^N 3 * i * i = n(n + 1)(2n + 1) / 2; + // divide by the above page total gives (2n+1) + EXPECT_FLOAT_EQ(2 * N + 1, total->avg_age()); +} + +TEST(PageAllocInfo, Small) { + PageAllocInfo info("", -1); + static_assert(kMaxPages >= 4, "odd config"); + + info.RecordAlloc(0, 2); + info.RecordAlloc(0, 2); + info.RecordAlloc(0, 2); + + info.RecordAlloc(0, 3); + info.RecordAlloc(0, 3); + + info.RecordFree(0, 3); + + auto c2 = info.counts_for(2); + EXPECT_EQ(3, c2.nalloc); + EXPECT_EQ(0, c2.nfree); + EXPECT_EQ(6, c2.alloc_size); + EXPECT_EQ(0, c2.free_size); + + auto c3 = info.counts_for(3); + EXPECT_EQ(2, c3.nalloc); + EXPECT_EQ(1, c3.nfree); + EXPECT_EQ(6, c3.alloc_size); + EXPECT_EQ(3, c3.free_size); + + EXPECT_EQ(3 * 2 + (2 - 1) * 3, info.small()); + EXPECT_EQ(0, info.slack()); +} + +TEST(PageAllocInfo, Large) { + PageAllocInfo info("", -1); + static_assert(kPagesPerHugePage > kMaxPages, "odd config"); + + // These three should be aggregated + Length slack = 0; + info.RecordAlloc(0, kMaxPages + 1); + slack += kPagesPerHugePage - kMaxPages - 1; + info.RecordAlloc(0, kMaxPages * 3 / 2); + slack += kPagesPerHugePage - kMaxPages * 3 / 2; + info.RecordAlloc(0, kMaxPages * 2); + slack += kPagesPerHugePage - kMaxPages * 2; + + // This shouldn't + const Length larger = kMaxPages * 2 + 1; + info.RecordAlloc(0, larger); + slack += + (kPagesPerHugePage - (larger % kPagesPerHugePage)) % kPagesPerHugePage; + + auto c1 = info.counts_for(kMaxPages + 1); + EXPECT_EQ(3, c1.nalloc); + EXPECT_EQ(0, c1.nfree); + EXPECT_EQ(kMaxPages * 9 / 2 + 1, c1.alloc_size); + EXPECT_EQ(0, c1.free_size); + + auto c2 = info.counts_for(kMaxPages * 2 + 1); + EXPECT_EQ(1, c2.nalloc); + EXPECT_EQ(0, c2.nfree); + EXPECT_EQ(kMaxPages * 2 + 1, c2.alloc_size); + EXPECT_EQ(0, c2.free_size); + + EXPECT_EQ(0, info.small()); + EXPECT_EQ(slack, info.slack()); +} + +TEST(ClockTest, ClockTicks) { + // It's a bit ironic to test this clock against other clocks since + // this exists because we don't trust other clocks. But hopefully + // no one is using libfaketime on this binary, and of course we + // don't care about signal safety, just ticking. + const absl::Time before = absl::Now(); + const int64_t b = tcmalloc::GetCurrentTimeNanos(); + static const absl::Duration kDur = absl::Milliseconds(500); + absl::SleepFor(kDur); + const int64_t a = tcmalloc::GetCurrentTimeNanos(); + const absl::Time after = absl::Now(); + + const absl::Duration actual = (after - before); + const absl::Duration measured = absl::Nanoseconds(a - b); + EXPECT_LE(actual * 0.99, measured) << actual; + EXPECT_GE(actual * 1.01, measured) << actual; +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/system-alloc.cc b/tcmalloc/system-alloc.cc new file mode 100644 index 000000000..1c757845b --- /dev/null +++ b/tcmalloc/system-alloc.cc @@ -0,0 +1,528 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/system-alloc.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/macros.h" +#include "absl/base/optimization.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/sampler.h" + +// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old +// form of the name instead. +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +// Solaris has a bug where it doesn't declare madvise() for C++. +// http://www.opensolaris.org/jive/thread.jspa?threadID=21035&tstart=0 +#if defined(__sun) && defined(__SVR4) +#include +extern "C" int madvise(caddr_t, size_t, int); +#endif + +namespace tcmalloc { + +namespace { + +// Check that no bit is set at position ADDRESS_BITS or higher. +template +void CheckAddressBits(uintptr_t ptr) { + ASSERT((ptr >> ADDRESS_BITS) == 0); +} + +// Specialize for the bit width of a pointer to avoid undefined shift. +template <> +ABSL_ATTRIBUTE_UNUSED void CheckAddressBits<8 * sizeof(void*)>(uintptr_t ptr) {} + +static_assert(kAddressBits <= 8 * sizeof(void*), + "kAddressBits must be smaller than the pointer size"); + +// Structure for discovering alignment +union MemoryAligner { + void* p; + double d; + size_t s; +} ABSL_CACHELINE_ALIGNED; + +static_assert(sizeof(MemoryAligner) < kMinSystemAlloc, + "hugepage alignment too small"); + +absl::base_internal::SpinLock spinlock(absl::base_internal::kLinkerInitialized); + +// Page size is initialized on demand +size_t pagesize = 0; +size_t preferred_alignment = 0; + +// The current region factory. +AddressRegionFactory* region_factory = nullptr; + +// Rounds size down to a multiple of alignment. +size_t RoundDown(const size_t size, const size_t alignment) { + // Checks that the alignment has only one bit set. + ASSERT(alignment != 0 && (alignment & (alignment - 1)) == 0); + return (size) & ~(alignment - 1); +} + +// Rounds size up to a multiple of alignment. +size_t RoundUp(const size_t size, const size_t alignment) { + return RoundDown(size + alignment - 1, alignment); +} + +// Rounds size up to the nearest power of 2. +// Requires: size <= (SIZE_MAX / 2) + 1. +size_t RoundUpPowerOf2(size_t size) { + for (size_t i = 0; i < sizeof(size_t) * 8; ++i) { + size_t pow2 = size_t{1} << i; + if (pow2 >= size) return pow2; + } + CHECK_CONDITION(false && "size too big to round up"); + return 0; +} + +class MmapRegion : public AddressRegion { + public: + MmapRegion(uintptr_t start, size_t size) : start_(start), free_size_(size) {} + std::pair Alloc(size_t size, size_t alignment) override; + + private: + const uintptr_t start_; + size_t free_size_; +}; + +class MmapRegionFactory : public AddressRegionFactory { + public: + AddressRegion* Create(void* start, size_t size, UsageHint hint) override; + size_t GetStats(absl::Span buffer) override; + size_t GetStatsInPbtxt(absl::Span buffer) override; + + private: + std::atomic bytes_reserved_{0}; +}; +std::aligned_storage::type mmap_space; + +class RegionManager { + public: + std::pair Alloc(size_t size, size_t alignment, bool tagged); + + void DiscardMappedRegions() { + untagged_region_ = nullptr; + tagged_region_ = nullptr; + } + + private: + // Checks that there is sufficient space available in the reserved region + // for the next allocation, if not allocate a new region. + // Then returns a pointer to the new memory. + std::pair Allocate(size_t size, size_t alignment, bool tagged); + + AddressRegion* untagged_region_{nullptr}; + AddressRegion* tagged_region_{nullptr}; +}; +std::aligned_storage::type + region_manager_space; +RegionManager* region_manager = nullptr; + +std::pair MmapRegion::Alloc(size_t request_size, + size_t alignment) { + // Align on kMinSystemAlloc boundaries to reduce external fragmentation for + // future allocations. + size_t size = RoundUp(request_size, kMinSystemAlloc); + if (size < request_size) return {nullptr, 0}; + alignment = std::max(alignment, preferred_alignment); + + // Tries to allocate size bytes from the end of [start_, start_ + free_size_), + // aligned to alignment. + uintptr_t end = start_ + free_size_; + uintptr_t result = end - size; + if (result > end) return {nullptr, 0}; // Underflow. + result &= ~(alignment - 1); + if (result < start_) return {nullptr, 0}; // Out of memory in region. + size_t actual_size = end - result; + + ASSERT(result % pagesize == 0); + void* result_ptr = reinterpret_cast(result); + if (mprotect(result_ptr, actual_size, PROT_READ | PROT_WRITE) != 0) { + Log(kLogWithStack, __FILE__, __LINE__, + "mprotect() region failed (ptr, size, error)", result_ptr, actual_size, + strerror(errno)); + return {nullptr, 0}; + } + free_size_ -= actual_size; + return {result_ptr, actual_size}; +} + +AddressRegion* MmapRegionFactory::Create(void* start, size_t size, + UsageHint hint) { + void* region_space = MallocInternal(sizeof(MmapRegion)); + if (!region_space) return nullptr; + bytes_reserved_.fetch_add(size, std::memory_order_relaxed); + return new (region_space) + MmapRegion(reinterpret_cast(start), size); +} + +size_t MmapRegionFactory::GetStats(absl::Span buffer) { + TCMalloc_Printer printer(buffer.data(), buffer.size()); + long long allocated = bytes_reserved_.load(std::memory_order_relaxed); + constexpr double MiB = 1048576.0; + printer.printf("MmapSysAllocator: %lld bytes (%.1f MiB) reserved\n", + allocated, allocated / MiB); + + size_t required = printer.SpaceRequired(); + // SpaceRequired includes the null terminator. + if (required > 0) { + required--; + } + + return required; +} + +size_t MmapRegionFactory::GetStatsInPbtxt(absl::Span buffer) { + TCMalloc_Printer printer(buffer.data(), buffer.size()); + long long allocated = bytes_reserved_.load(std::memory_order_relaxed); + printer.printf("mmap_sys_allocator: %lld\n", allocated); + + size_t required = printer.SpaceRequired(); + // SpaceRequired includes the null terminator. + if (required > 0) { + required--; + } + + return required; +} + +std::pair RegionManager::Alloc(size_t request_size, + size_t alignment, bool tagged) { + // We do not support size or alignment larger than kTagMask. + // TODO(b/141325493): Handle these large allocations. + if (request_size > kTagMask || alignment > kTagMask) return {nullptr, 0}; + + // If we are dealing with large sizes, or large alignments we do not + // want to throw away the existing reserved region, so instead we + // return a new region specifically targeted for the request. + if (request_size > kMinMmapAlloc || alignment > kMinMmapAlloc) { + // Align on kMinSystemAlloc boundaries to reduce external fragmentation for + // future allocations. + size_t size = RoundUp(request_size, kMinSystemAlloc); + if (size < request_size) return {nullptr, 0}; + alignment = std::max(alignment, preferred_alignment); + void* ptr = MmapAligned(size, alignment, tagged); + if (!ptr) return {nullptr, 0}; + auto region_type = tagged ? AddressRegionFactory::UsageHint::kInfrequent + : AddressRegionFactory::UsageHint::kNormal; + AddressRegion* region = region_factory->Create(ptr, size, region_type); + if (!region) { + munmap(ptr, size); + return {nullptr, 0}; + } + std::pair result = region->Alloc(size, alignment); + if (result.first != nullptr) { + ASSERT(result.first == ptr); + ASSERT(result.second == size); + } else { + ASSERT(result.second == 0); + } + return result; + } + return Allocate(request_size, alignment, tagged); +} + +std::pair RegionManager::Allocate(size_t size, size_t alignment, + bool tagged) { + AddressRegion*& region = tagged ? tagged_region_ : untagged_region_; + // For sizes that fit in our reserved range first of all check if we can + // satisfy the request from what we have available. + if (region) { + std::pair result = region->Alloc(size, alignment); + if (result.first) return result; + } + + // Allocation failed so we need to reserve more memory. + // Reserve new region and try allocation again. + void* ptr = MmapAligned(kMinMmapAlloc, kMinMmapAlloc, tagged); + if (!ptr) return {nullptr, 0}; + auto region_type = tagged ? AddressRegionFactory::UsageHint::kInfrequent + : AddressRegionFactory::UsageHint::kNormal; + region = region_factory->Create(ptr, kMinMmapAlloc, region_type); + if (!region) { + munmap(ptr, kMinMmapAlloc); + return {nullptr, 0}; + } + return region->Alloc(size, alignment); +} + +void InitSystemAllocatorIfNecessary() { + if (region_factory) return; + pagesize = getpagesize(); + // Sets the preferred alignment to be the largest of either the alignment + // returned by mmap() or our minimum allocation size. The minimum allocation + // size is usually a multiple of page size, but this need not be true for + // SMALL_BUT_SLOW where we do not allocate in units of huge pages. + preferred_alignment = std::max(pagesize, kMinSystemAlloc); + region_manager = new (®ion_manager_space) RegionManager(); + region_factory = new (&mmap_space) MmapRegionFactory(); +} + +ABSL_CONST_INIT std::atomic system_release_errors = ATOMIC_VAR_INIT(0); + +} // namespace + +void* SystemAlloc(size_t bytes, size_t* actual_bytes, size_t alignment, + bool tagged) { + // If default alignment is set request the minimum alignment provided by + // the system. + alignment = std::max(alignment, pagesize); + + // Discard requests that overflow + if (bytes + alignment < bytes) return nullptr; + + // This may return significantly more memory than "bytes" by default, so + // require callers to know the true amount allocated. + ASSERT(actual_bytes != nullptr); + + absl::base_internal::SpinLockHolder lock_holder(&spinlock); + + InitSystemAllocatorIfNecessary(); + + void* result = nullptr; + std::tie(result, *actual_bytes) = + region_manager->Alloc(bytes, alignment, tagged); + + if (result != nullptr) { + CheckAddressBits(reinterpret_cast(result) + + *actual_bytes - 1); + ASSERT(tcmalloc::IsTaggedMemory(result) == tagged); + } + return result; +} + +static bool ReleasePages(void* start, size_t length) { + int ret; + // Note -- ignoring most return codes, because if this fails it + // doesn't matter... + // Moreover, MADV_REMOVE *will* fail (with EINVAL) on anonymous memory, + // but that's harmless. +#ifdef MADV_REMOVE + // MADV_REMOVE deletes any backing storage for non-anonymous memory + // (tmpfs). + do { + ret = madvise(start, length, MADV_REMOVE); + } while (ret == -1 && errno == EAGAIN); + + if (ret == 0) { + return true; + } +#endif +#ifdef MADV_DONTNEED + // MADV_DONTNEED drops page table info and any anonymous pages. + do { + ret = madvise(start, length, MADV_DONTNEED); + } while (ret == -1 && errno == EAGAIN); + + if (ret == 0) { + return true; + } +#endif + + return false; +} + +int SystemReleaseErrors() { + return system_release_errors.load(std::memory_order_relaxed); +} + +void SystemRelease(void* start, size_t length) { + int saved_errno = errno; +#if defined(MADV_DONTNEED) || defined(MADV_REMOVE) + const size_t pagemask = pagesize - 1; + + size_t new_start = reinterpret_cast(start); + size_t end = new_start + length; + size_t new_end = end; + + // Round up the starting address and round down the ending address + // to be page aligned: + new_start = (new_start + pagesize - 1) & ~pagemask; + new_end = new_end & ~pagemask; + + ASSERT((new_start & pagemask) == 0); + ASSERT((new_end & pagemask) == 0); + ASSERT(new_start >= reinterpret_cast(start)); + ASSERT(new_end <= end); + + if (new_end > new_start) { + void* new_ptr = reinterpret_cast(new_start); + size_t new_length = new_end - new_start; + + if (!ReleasePages(new_ptr, new_length)) { + // Try unlocking. + int ret; + do { + ret = munlock(reinterpret_cast(new_start), new_end - new_start); + } while (ret == -1 && errno == EAGAIN); + + if (ret != 0 || !ReleasePages(new_ptr, new_length)) { + // If we fail to munlock *or* fail our second attempt at madvise, + // increment our failure count. + system_release_errors.fetch_add(1, std::memory_order_relaxed); + } + } + } +#endif + errno = saved_errno; +} + +void SystemBack(void* start, size_t length) { + // TODO(b/134694141): use madvise when we have better support for that; + // taking faults is not free. + + // TODO(b/134694141): enable this, if we can avoid causing trouble for apps + // that routinely make large mallocs they never touch (sigh). + return; + + // Strictly speaking, not everything uses 4K pages. However, we're + // not asking the OS for anything actually page-related, just taking + // a fault on every "page". If the real page size is bigger, we do + // a few extra reads; this is not worth worrying about. + static const size_t kHardwarePageSize = 4 * 1024; + CHECK_CONDITION(reinterpret_cast(start) % kHardwarePageSize == 0); + CHECK_CONDITION(length % kHardwarePageSize == 0); + const size_t num_pages = length / kHardwarePageSize; + + struct PageStruct { + volatile size_t data[kHardwarePageSize / sizeof(size_t)]; + }; + CHECK_CONDITION(sizeof(PageStruct) == kHardwarePageSize); + + PageStruct* ps = reinterpret_cast(start); + PageStruct* limit = ps + num_pages; + for (; ps < limit; ++ps) { + ps->data[0] = 0; + } +} + +AddressRegionFactory* GetRegionFactory() { + absl::base_internal::SpinLockHolder lock_holder(&spinlock); + InitSystemAllocatorIfNecessary(); + return region_factory; +} + +void SetRegionFactory(AddressRegionFactory* factory) { + absl::base_internal::SpinLockHolder lock_holder(&spinlock); + InitSystemAllocatorIfNecessary(); + region_manager->DiscardMappedRegions(); + region_factory = factory; +} + +static uintptr_t RandomMmapHint(size_t size, size_t alignment, bool tagged) { + // Rely on kernel's mmap randomization to seed our RNG. + static uintptr_t rnd = []() { + void* seed = + mmap(nullptr, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (seed == MAP_FAILED) { + Log(kCrash, __FILE__, __LINE__, + "Initial mmap() reservation failed (size)", kPageSize); + } + munmap(seed, kPageSize); + return reinterpret_cast(seed); + }(); + + // Mask out bits that cannot be used by the hardware, mask out the top + // "usable" bit since it is reserved for kernel use, and also mask out the + // next top bit to significantly reduce collisions with mappings that tend to + // be placed in the upper half of the address space (e.g., stack, executable, + // kernel-placed mmaps). See b/139357826. +#if defined(MEMORY_SANITIZER) || defined(THREAD_SANITIZER) + // MSan and TSan use up all of the lower address space, so we allow use of + // mid-upper address space when they're active. This only matters for + // TCMalloc-internal tests, since sanitizers install their own malloc/free. + constexpr uintptr_t kAddrMask = (uintptr_t{3} << (kAddressBits - 3)) - 1; +#else + constexpr uintptr_t kAddrMask = (uintptr_t{1} << (kAddressBits - 2)) - 1; +#endif + + // Ensure alignment >= size so we're guaranteed the full mapping has the same + // tag. + alignment = RoundUpPowerOf2(std::max(alignment, size)); + + rnd = Sampler::NextRandom(rnd); + uintptr_t addr = rnd & kAddrMask & ~(alignment - 1) & ~kTagMask; + if (!tagged) { + addr |= kTagMask; + } + return addr; +} + +void* MmapAligned(size_t size, size_t alignment, bool tagged) { + ASSERT(size <= kTagMask); + ASSERT(alignment <= kTagMask); + + static uintptr_t next_untagged_addr = 0; + static uintptr_t next_tagged_addr = 0; + + uintptr_t& next_addr = tagged ? next_tagged_addr : next_untagged_addr; + if (!next_addr || next_addr & (alignment - 1) || + IsTaggedMemory(reinterpret_cast(next_addr)) != tagged || + IsTaggedMemory(reinterpret_cast(next_addr + size - 1)) != tagged) { + next_addr = RandomMmapHint(size, alignment, tagged); + } + for (int i = 0; i < 1000; ++i) { + void* hint = reinterpret_cast(next_addr); + // TODO(b/140190055): Use MAP_FIXED_NOREPLACE once available. + void* result = + mmap(hint, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (result == hint) { + // Attempt to keep the next mmap contiguous in the common case. + next_addr += size; + CHECK_CONDITION(kAddressBits == std::numeric_limits::digits || + next_addr <= uintptr_t{1} << kAddressBits); + return result; + } + if (result == MAP_FAILED) { + Log(kLogWithStack, __FILE__, __LINE__, + "mmap() reservation failed (hint, size, error)", hint, size, + strerror(errno)); + return nullptr; + } + if (int err = munmap(result, size)) { + Log(kLogWithStack, __FILE__, __LINE__, "munmap() failed"); + ASSERT(err == 0); + } + next_addr = RandomMmapHint(size, alignment, tagged); + } + + Log(kLogWithStack, __FILE__, __LINE__, + "MmapAligned() failed (size, alignment)", size, alignment); + return nullptr; +} + +} // namespace tcmalloc diff --git a/tcmalloc/system-alloc.h b/tcmalloc/system-alloc.h new file mode 100644 index 000000000..998732968 --- /dev/null +++ b/tcmalloc/system-alloc.h @@ -0,0 +1,85 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Routine that uses sbrk/mmap to allocate memory from the system. +// Useful for implementing malloc. + +#ifndef TCMALLOC_SYSTEM_ALLOC_H_ +#define TCMALLOC_SYSTEM_ALLOC_H_ + +#include + +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/span.h" + +namespace tcmalloc { + +// REQUIRES: "alignment" is a power of two or "0" to indicate default alignment +// REQUIRES: "alignment" and "size" <= kTagMask +// +// Allocate and return "bytes" of zeroed memory. The allocator may optionally +// return more bytes than asked for (i.e. return an entire "huge" page). The +// length of the returned memory area is stored in *actual_bytes. +// +// The returned pointer is a multiple of "alignment" if non-zero. The +// returned pointer will always be aligned suitably for holding a +// void*, double, or size_t. In addition, if this platform defines +// ABSL_CACHELINE_ALIGNED, the return pointer will always be cacheline +// aligned. +// +// The returned pointer is guaranteed to satisfy IsTagged(ptr) == "tagged". +// +// Returns nullptr when out of memory. +void *SystemAlloc(size_t bytes, size_t *actual_bytes, size_t alignment, + bool tagged); + +// Returns the number of times we failed to give pages back to the OS after a +// call to SystemRelease. +int SystemReleaseErrors(); + +// This call is a hint to the operating system that the pages +// contained in the specified range of memory will not be used for a +// while, and can be released for use by other processes or the OS. +// Pages which are released in this way may be destroyed (zeroed) by +// the OS. The benefit of this function is that it frees memory for +// use by the system, the cost is that the pages are faulted back into +// the address space next time they are touched, which can impact +// performance. (Only pages fully covered by the memory region will +// be released, partial pages will not.) +void SystemRelease(void *start, size_t length); + +// This call is the inverse of SystemRelease: the pages in this range +// are in use and should be faulted in. (In principle this is a +// best-effort hint, but in practice we will unconditionally fault the +// range.) +// REQUIRES: [start, start + length) is a range aligned to 4KiB boundaries. +void SystemBack(void *start, size_t length); + +// Returns the current address region factory. +AddressRegionFactory *GetRegionFactory(); + +// Sets the current address region factory to factory. +void SetRegionFactory(AddressRegionFactory *factory); + +// Reserves using mmap() a region of memory of the requested size and alignment, +// with the bits specified by kTagMask set to 0 if tagged is true and 1 +// otherwise. +// +// REQUIRES: pagesize <= alignment <= kTagMask +// REQUIRES: size <= kTagMask +void *MmapAligned(size_t size, size_t alignment, bool tagged); + +} // namespace tcmalloc + +#endif // TCMALLOC_SYSTEM_ALLOC_H_ diff --git a/tcmalloc/system-alloc_unittest.cc b/tcmalloc/system-alloc_unittest.cc new file mode 100644 index 000000000..0f3551534 --- /dev/null +++ b/tcmalloc/system-alloc_unittest.cc @@ -0,0 +1,136 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/system-alloc.h" + +#include +#include +#include + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace { + +class MmapAlignedTest : public testing::TestWithParam { + protected: + void MmapAndCheck(size_t size, size_t alignment) { + for (bool tagged : {true, false}) { + void* p = tcmalloc::MmapAligned(size, alignment, tagged); + EXPECT_NE(p, nullptr); + EXPECT_EQ(reinterpret_cast(p) % alignment, 0); + EXPECT_EQ(tcmalloc::IsTaggedMemory(p), tagged); + EXPECT_EQ(tcmalloc::IsTaggedMemory(static_cast(p) + size - 1), + tagged); + EXPECT_EQ(munmap(p, size), 0); + } + } +}; +INSTANTIATE_TEST_SUITE_P(VariedAlignment, MmapAlignedTest, + testing::Values(kPageSize, tcmalloc::kMinSystemAlloc, + tcmalloc::kMinMmapAlloc, + tcmalloc::kTagMask)); + +TEST_P(MmapAlignedTest, CorrectAlignmentAndTag) { + MmapAndCheck(tcmalloc::kMinSystemAlloc, GetParam()); +} + +// Ensure mmap sizes near kTagMask still have the correct tag at the beginning +// and end of the mapping. +TEST_F(MmapAlignedTest, LargeSizeSmallAlignment) { + MmapAndCheck(tcmalloc::kTagMask, kPageSize); +} + +// Was SimpleRegion::Alloc invoked at least once? +static bool simple_region_alloc_invoked = false; + +class SimpleRegion : public AddressRegion { + public: + SimpleRegion(uintptr_t start, size_t size) + : start_(start), free_size_(size) {} + + std::pair Alloc(size_t size, size_t alignment) override { + simple_region_alloc_invoked = true; + uintptr_t result = (start_ + free_size_ - size) & ~(alignment - 1); + if (result < start_ || result >= start_ + free_size_) return {nullptr, 0}; + size_t actual_size = start_ + free_size_ - result; + free_size_ -= actual_size; + void* ptr = reinterpret_cast(result); + int err = mprotect(ptr, actual_size, PROT_READ | PROT_WRITE); + CHECK_CONDITION(err == 0); + return {ptr, actual_size}; + } + + private: + uintptr_t start_; + size_t free_size_; +}; + +class SimpleRegionFactory : public AddressRegionFactory { + public: + AddressRegion* Create(void* start, size_t size, UsageHint hint) override { + void* region_space = MallocInternal(sizeof(SimpleRegion)); + CHECK_CONDITION(region_space != nullptr); + return new (region_space) + SimpleRegion(reinterpret_cast(start), size); + } +}; +SimpleRegionFactory f; + +TEST(Basic, InvokedTest) { + MallocExtension::SetRegionFactory(&f); + + // An allocation size that is likely to trigger the system allocator. + void* p = malloc(tcmalloc::kMinSystemAlloc); + free(p); + + // Make sure that our allocator was invoked. + ASSERT_TRUE(simple_region_alloc_invoked); +} + +TEST(Basic, RetryFailTest) { + // Check with the allocator still works after a failed allocation. + // + // There is no way to call malloc and guarantee it will fail. malloc takes a + // size_t parameter and the C++ standard does not constrain the size of + // size_t. For example, consider an implementation where size_t is 32 bits + // and pointers are 64 bits. + // + // It is likely, though, that sizeof(size_t) == sizeof(void*). In that case, + // the first allocation here might succeed but the second allocation must + // fail. + // + // If the second allocation succeeds, you will have to rewrite or + // disable this test. + const size_t kHugeSize = std::numeric_limits::max() / 2; + void* p1 = malloc(kHugeSize); + void* p2 = malloc(kHugeSize); + ASSERT_EQ(p2, nullptr); + if (p1 != nullptr) free(p1); + + void* q = malloc(1024); + ASSERT_NE(q, nullptr); + free(q); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/tcmalloc.cc b/tcmalloc/tcmalloc.cc new file mode 100644 index 000000000..6a6355255 --- /dev/null +++ b/tcmalloc/tcmalloc.cc @@ -0,0 +1,2298 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SYNCHRONIZATION +// 1. The thread-/cpu-specific lists are accessed without acquiring any locks. +// This is safe because each such list is only accessed by one thread/cpu at +// a time. +// 2. We have a lock per central free-list, and hold it while manipulating +// the central free list for a particular size. +// 3. The central page allocator is protected by "pageheap_lock". +// 4. The pagemap (which maps from page-number to descriptor), +// can be read without holding any locks, and written while holding +// the "pageheap_lock". +// +// This multi-threaded access to the pagemap is safe for fairly +// subtle reasons. We basically assume that when an object X is +// allocated by thread A and deallocated by thread B, there must +// have been appropriate synchronization in the handoff of object +// X from thread A to thread B. +// +// PAGEMAP +// ------- +// Page map contains a mapping from page id to Span. +// +// If Span s occupies pages [p..q], +// pagemap[p] == s +// pagemap[q] == s +// pagemap[p+1..q-1] are undefined +// pagemap[p-1] and pagemap[q+1] are defined: +// NULL if the corresponding page is not yet in the address space. +// Otherwise it points to a Span. This span may be free +// or allocated. If free, it is in one of pageheap's freelist. + +#include "tcmalloc/tcmalloc.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/base/dynamic_annotations.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/internal/sysinfo.h" +#include "absl/base/macros.h" +#include "absl/base/optimization.h" +#include "absl/base/thread_annotations.h" +#include "absl/debugging/stacktrace.h" +#include "absl/memory/memory.h" +#include "absl/strings/match.h" +#include "absl/strings/numbers.h" +#include "absl/strings/strip.h" +#include "tcmalloc/common.h" +#include "tcmalloc/cpu_cache.h" +#include "tcmalloc/experiment.h" +#include "tcmalloc/guarded_page_allocator.h" +#include "tcmalloc/internal/linked_list.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/memory_stats.h" +#include "tcmalloc/internal/percpu.h" +#include "tcmalloc/internal_malloc_extension.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/page_allocator.h" +#include "tcmalloc/page_heap.h" +#include "tcmalloc/page_heap_allocator.h" +#include "tcmalloc/pagemap.h" +#include "tcmalloc/parameters.h" +#include "tcmalloc/sampler.h" +#include "tcmalloc/span.h" +#include "tcmalloc/stack_trace_table.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/stats.h" +#include "tcmalloc/system-alloc.h" +#include "tcmalloc/tcmalloc_policy.h" +#include "tcmalloc/thread_cache.h" +#include "tcmalloc/tracking.h" +#include "tcmalloc/transfer_cache.h" + +#if defined(OS_FREEBSD) || defined(OS_MACOSX) +#undef HAVE_STRUCT_MALLINFO +#else +#include +#define HAVE_STRUCT_MALLINFO +#endif + +using tcmalloc::AllocatorStats; +using tcmalloc::CppPolicy; +using tcmalloc::kCrash; +using tcmalloc::kCrashWithStats; +using tcmalloc::kLog; +using tcmalloc::kLogWithStack; +using tcmalloc::Log; +using tcmalloc::MallocPolicy; +using tcmalloc::pageheap_lock; +using tcmalloc::Sampler; +using tcmalloc::Span; +using tcmalloc::StackTrace; +using tcmalloc::StackTraceTable; +using tcmalloc::Static; +using tcmalloc::ThreadCache; + +// ----------------------- IMPLEMENTATION ------------------------------- + +// Extract interesting stats +struct TCMallocStats { + uint64_t thread_bytes; // Bytes in thread caches + uint64_t central_bytes; // Bytes in central cache + uint64_t transfer_bytes; // Bytes in central transfer cache + uint64_t metadata_bytes; // Bytes alloced for metadata + uint64_t per_cpu_bytes; // Bytes in per-CPU cache + uint64_t pagemap_root_bytes_res; // Resident bytes of pagemap root node + uint64_t percpu_metadata_bytes_res; // Resident bytes of the per-CPU metadata + AllocatorStats tc_stats; // ThreadCache objects + AllocatorStats span_stats; // Span objects + AllocatorStats stack_stats; // StackTrace objects + AllocatorStats bucket_stats; // StackTraceTable::Bucket objects + size_t pagemap_bytes; // included in metadata bytes + size_t percpu_metadata_bytes; // included in metadata bytes + tcmalloc::BackingStats pageheap; // Stats from page heap +}; + +// Get stats into "r". Also, if class_count != NULL, class_count[k] +// will be set to the total number of objects of size class k in the +// central cache, transfer cache, and per-thread and per-CPU caches. +// If small_spans is non-NULL, it is filled. Same for large_spans. +// The boolean report_residence determines whether residence information +// should be captured or not. Residence info requires a potentially +// costly OS call, and is not necessary in all situations. +static void ExtractStats(TCMallocStats* r, uint64_t* class_count, + tcmalloc::SmallSpanStats* small_spans, + tcmalloc::LargeSpanStats* large_spans, + bool report_residence) { + r->central_bytes = 0; + r->transfer_bytes = 0; + for (int cl = 0; cl < kNumClasses; ++cl) { + const size_t length = Static::transfer_cache()[cl].central_length(); + const size_t tc_length = Static::transfer_cache()[cl].tc_length(); + const size_t cache_overhead = Static::transfer_cache()[cl].OverheadBytes(); + const size_t size = Static::sizemap()->class_to_size(cl); + r->central_bytes += (size * length) + cache_overhead; + r->transfer_bytes += (size * tc_length); + if (class_count) { + // Sum the lengths of all per-class freelists, except the per-thread + // freelists, which get counted when we call GetThreadStats(), below. + class_count[cl] = length + tc_length; + if (tcmalloc::UsePerCpuCache()) { + class_count[cl] += Static::cpu_cache()->TotalObjectsOfClass(cl); + } + } + } + + // Add stats from per-thread heaps + r->thread_bytes = 0; + { // scope + absl::base_internal::SpinLockHolder h(&pageheap_lock); + ThreadCache::GetThreadStats(&r->thread_bytes, class_count); + r->tc_stats = ThreadCache::HeapStats(); + r->span_stats = Static::span_allocator()->stats(); + r->stack_stats = Static::stacktrace_allocator()->stats(); + r->bucket_stats = Static::bucket_allocator()->stats(); + r->metadata_bytes = Static::metadata_bytes(); + r->pagemap_bytes = Static::pagemap()->bytes(); + r->pageheap = Static::page_allocator()->stats(); + if (small_spans != nullptr) { + Static::page_allocator()->GetSmallSpanStats(small_spans); + } + if (large_spans != nullptr) { + Static::page_allocator()->GetLargeSpanStats(large_spans); + } + } + // We can access the pagemap without holding the pageheap_lock since it + // is static data, and we are only taking address and size which are + // constants. + if (report_residence) { + auto resident_bytes = Static::pagemap_residence(); + r->pagemap_root_bytes_res = resident_bytes; + ASSERT(r->metadata_bytes >= r->pagemap_bytes); + r->metadata_bytes = r->metadata_bytes - r->pagemap_bytes + resident_bytes; + } else { + r->pagemap_root_bytes_res = 0; + } + + r->per_cpu_bytes = 0; + r->percpu_metadata_bytes_res = 0; + r->percpu_metadata_bytes = 0; + if (tcmalloc::UsePerCpuCache()) { + r->per_cpu_bytes = Static::cpu_cache()->TotalUsedBytes(); + + if (report_residence) { + auto percpu_metadata = Static::cpu_cache()->MetadataMemoryUsage(); + r->percpu_metadata_bytes_res = percpu_metadata.resident_size; + r->percpu_metadata_bytes = percpu_metadata.virtual_size; + + ASSERT(r->metadata_bytes >= r->percpu_metadata_bytes); + r->metadata_bytes = r->metadata_bytes - r->percpu_metadata_bytes + + r->percpu_metadata_bytes_res; + } + } +} + +// Because different fields of stats are computed from state protected +// by different locks, they may be inconsistent. Prevent underflow +// when subtracting to avoid gigantic results. +static uint64_t StatSub(uint64_t a, uint64_t b) { + return (a >= b) ? (a - b) : 0; +} + +// Return approximate number of bytes in use by app. +static uint64_t InUseByApp(const TCMallocStats& stats) { + return StatSub(stats.pageheap.system_bytes, + stats.thread_bytes + + stats.central_bytes + + stats.transfer_bytes + + stats.per_cpu_bytes + + stats.pageheap.free_bytes + + stats.pageheap.unmapped_bytes); +} + +static uint64_t VirtualMemoryUsed(const TCMallocStats& stats) { + return stats.pageheap.system_bytes + stats.metadata_bytes; +} + +static uint64_t PhysicalMemoryUsed(const TCMallocStats& stats) { + return StatSub(VirtualMemoryUsed(stats), stats.pageheap.unmapped_bytes); +} + +// The number of bytes either in use by the app or fragmented so that +// it cannot be (arbitrarily) reused. +static uint64_t RequiredBytes(const TCMallocStats& stats) { + return StatSub(PhysicalMemoryUsed(stats), stats.pageheap.free_bytes); +} + +// WRITE stats to "out" +static void DumpStats(TCMalloc_Printer* out, int level) { + TCMallocStats stats; + uint64_t class_count[kNumClasses]; + if (level >= 2) { + ExtractStats(&stats, class_count, nullptr, nullptr, true); + } else { + ExtractStats(&stats, nullptr, nullptr, nullptr, true); + } + + static const double MiB = 1048576.0; + + const uint64_t virtual_memory_used = VirtualMemoryUsed(stats); + const uint64_t physical_memory_used = PhysicalMemoryUsed(stats); + const uint64_t bytes_in_use_by_app = InUseByApp(stats); + +#ifdef TCMALLOC_SMALL_BUT_SLOW + out->printf( + "NOTE: SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n"); +#endif + // clang-format off + // Avoid clang-format complaining about the way that this text is laid out. + out->printf( + "------------------------------------------------\n" + "MALLOC: %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in per-CPU cache freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in thread cache freelists\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in malloc metadata\n" + "MALLOC: ------------\n" + "MALLOC: = %12" PRIu64 " (%7.1f MiB) Actual memory used (physical + swap)\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes released to OS (aka unmapped)\n" + "MALLOC: ------------\n" + "MALLOC: = %12" PRIu64 " (%7.1f MiB) Virtual address space used\n" + "MALLOC:\n" + "MALLOC: %12" PRIu64 " Spans in use\n" + "MALLOC: %12" PRIu64 " (%7.1f MiB) Spans created\n" + "MALLOC: %12" PRIu64 " Thread heaps in use\n" + "MALLOC: %12" PRIu64 " (%7.1f MiB) Thread heaps created\n" + "MALLOC: %12" PRIu64 " Stack traces in use\n" + "MALLOC: %12" PRIu64 " (%7.1f MiB) Stack traces created\n" + "MALLOC: %12" PRIu64 " Table buckets in use\n" + "MALLOC: %12" PRIu64 " (%7.1f MiB) Table buckets created\n" + "MALLOC: %12" PRIu64 " (%7.1f MiB) Pagemap bytes used\n" + "MALLOC: %12" PRIu64 " (%7.1f MiB) Pagemap root resident bytes\n" + "MALLOC: %12" PRIu64 " (%7.1f MiB) per-CPU slab bytes used\n" + "MALLOC: %12" PRIu64 " (%7.1f MiB) per-CPU slab resident bytes\n" + "MALLOC: %12" PRIu64 " Tcmalloc page size\n" + "MALLOC: %12" PRIu64 " Tcmalloc hugepage size\n", + bytes_in_use_by_app, bytes_in_use_by_app / MiB, + stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB, + stats.central_bytes, stats.central_bytes / MiB, + stats.per_cpu_bytes, stats.per_cpu_bytes / MiB, + stats.transfer_bytes, stats.transfer_bytes / MiB, + stats.thread_bytes, stats.thread_bytes / MiB, + stats.metadata_bytes, stats.metadata_bytes / MiB, + physical_memory_used, physical_memory_used / MiB, + stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MiB, + virtual_memory_used, virtual_memory_used / MiB, + uint64_t(stats.span_stats.in_use), + uint64_t(stats.span_stats.total), + (stats.span_stats.total * sizeof(Span)) / MiB, + uint64_t(stats.tc_stats.in_use), + uint64_t(stats.tc_stats.total), + (stats.tc_stats.total * sizeof(ThreadCache)) / MiB, + uint64_t(stats.stack_stats.in_use), + uint64_t(stats.stack_stats.total), + (stats.stack_stats.total * sizeof(StackTrace)) / MiB, + uint64_t(stats.bucket_stats.in_use), + uint64_t(stats.bucket_stats.total), + (stats.bucket_stats.total * sizeof(StackTraceTable::Bucket)) / MiB, + uint64_t(stats.pagemap_bytes), + stats.pagemap_bytes / MiB, + stats.pagemap_root_bytes_res, stats.pagemap_root_bytes_res / MiB, + uint64_t(stats.percpu_metadata_bytes), + stats.percpu_metadata_bytes / MiB, + stats.percpu_metadata_bytes_res, stats.percpu_metadata_bytes_res / MiB, + uint64_t(kPageSize), + uint64_t(tcmalloc::kHugePageSize)); + // clang-format on + + tcmalloc::PrintExperiments(out); + + tcmalloc::tcmalloc_internal::MemoryStats memstats; + if (tcmalloc::tcmalloc_internal::GetMemoryStats(&memstats)) { + uint64_t rss = memstats.rss; + uint64_t vss = memstats.vss; + // clang-format off + out->printf( + "\n" + "Total process stats (inclusive of non-malloc sources):\n" + "TOTAL: %12" PRIu64 " (%7.1f MiB) Bytes resident (physical memory used)\n" + "TOTAL: %12" PRIu64 " (%7.1f MiB) Bytes mapped (virtual memory used)\n", + rss, rss / MiB, vss, vss / MiB); + // clang-format on + } + out->printf( + "------------------------------------------------\n" + "Call ReleaseMemoryToSystem() to release freelist memory to the OS" + " (via madvise()).\n" + "Bytes released to the OS take up virtual address space" + " but no physical memory.\n"); + if (level >= 2) { + out->printf("------------------------------------------------\n"); + out->printf("Total size of freelists for per-thread and per-CPU caches,\n"); + out->printf("transfer cache, and central cache, by size class\n"); + out->printf("------------------------------------------------\n"); + uint64_t cumulative = 0; + for (int cl = 0; cl < kNumClasses; ++cl) { + if (class_count[cl] > 0) { + uint64_t class_bytes = + class_count[cl] * Static::sizemap()->class_to_size(cl); + cumulative += class_bytes; + out->printf( + "class %3d [ %8zu bytes ] : " + "%8" PRIu64 " objs; %5.1f MiB; %5.1f cum MiB\n", + cl, Static::sizemap()->class_to_size(cl), class_count[cl], + class_bytes / MiB, cumulative / MiB); + } + } + + if (tcmalloc::UsePerCpuCache()) { + out->printf("------------------------------------------------\n"); + out->printf( + "Bytes in per-CPU caches (per cpu limit: %" PRIu64 " bytes)\n", + Static::cpu_cache()->CacheLimit()); + out->printf("------------------------------------------------\n"); + + cpu_set_t allowed_cpus; + if (sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus) != 0) { + CPU_ZERO(&allowed_cpus); + } + + for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); + cpu < num_cpus; ++cpu) { + uint64_t rbytes = Static::cpu_cache()->UsedBytes(cpu); + bool populated = Static::cpu_cache()->HasPopulated(cpu); + uint64_t unallocated = Static::cpu_cache()->Unallocated(cpu); + out->printf("cpu %3d: %12" PRIu64 + " bytes (%7.1f MiB) with" + "%12" PRIu64 " bytes unallocated %s%s\n", + cpu, rbytes, rbytes / MiB, unallocated, + CPU_ISSET(cpu, &allowed_cpus) ? " active" : "", + populated ? " populated" : ""); + } + } + + Static::page_allocator()->Print(out, /*tagged=*/false); + Static::page_allocator()->Print(out, /*tagged=*/true); + tcmalloc::tracking::Print(out); + Static::guardedpage_allocator()->Print(out); + + uint64_t limit_bytes; + bool is_hard; + std::tie(limit_bytes, is_hard) = Static::page_allocator()->limit(); + out->printf("PARAMETER desired_usage_limit_bytes %" PRIu64 " %s\n", + limit_bytes, is_hard ? "(hard)" : ""); + long long limit_hits = Static::page_allocator()->limit_hits(); + out->printf("Number of times limit was hit: %lld\n", limit_hits); + + out->printf("PARAMETER tcmalloc_per_cpu_caches %d\n", + tcmalloc::Parameters::per_cpu_caches() ? 1 : 0); + out->printf("PARAMETER tcmalloc_max_per_cpu_cache_size %d\n", + tcmalloc::Parameters::max_per_cpu_cache_size()); + const long long thread_cache_max = + tcmalloc::Parameters::max_total_thread_cache_bytes(); + out->printf("PARAMETER tcmalloc_max_total_thread_cache_bytes %lld\n", + thread_cache_max); + } +} + +namespace { + +/*static*/ void DumpStatsInPbtxt(TCMalloc_Printer* out, int level) { + TCMallocStats stats; + uint64_t class_count[kNumClasses]; + if (level >= 2) { + ExtractStats(&stats, class_count, nullptr, nullptr, true); + } else { + ExtractStats(&stats, nullptr, nullptr, nullptr, true); + } + + const uint64_t bytes_in_use_by_app = InUseByApp(stats); + const uint64_t virtual_memory_used = VirtualMemoryUsed(stats); + const uint64_t physical_memory_used = PhysicalMemoryUsed(stats); + + PbtxtRegion region(out, kTop, /*indent=*/0); + region.PrintI64("in_use_by_app", bytes_in_use_by_app); + region.PrintI64("page_heap_freelist", stats.pageheap.free_bytes); + region.PrintI64("central_cache_freelist", stats.central_bytes); + region.PrintI64("per_cpu_cache_freelist", stats.per_cpu_bytes); + region.PrintI64("transfer_cache_freelist", stats.transfer_bytes); + region.PrintI64("thread_cache_freelists", stats.thread_bytes); + region.PrintI64("malloc_metadata", stats.metadata_bytes); + region.PrintI64("actual_mem_used", physical_memory_used); + region.PrintI64("unmapped", stats.pageheap.unmapped_bytes); + region.PrintI64("virtual_address_space_used", virtual_memory_used); + region.PrintI64("num_spans", uint64_t(stats.span_stats.in_use)); + region.PrintI64("num_spans_created", uint64_t(stats.span_stats.total)); + region.PrintI64("num_thread_heaps", uint64_t(stats.tc_stats.in_use)); + region.PrintI64("num_thread_heaps_created", uint64_t(stats.tc_stats.total)); + region.PrintI64("num_stack_traces", uint64_t(stats.stack_stats.in_use)); + region.PrintI64("num_stack_traces_created", + uint64_t(stats.stack_stats.total)); + region.PrintI64("num_table_buckets", uint64_t(stats.bucket_stats.in_use)); + region.PrintI64("num_table_buckets_created", + uint64_t(stats.bucket_stats.total)); + region.PrintI64("pagemap_size", uint64_t(stats.pagemap_bytes)); + region.PrintI64("pagemap_root_residence", stats.pagemap_root_bytes_res); + region.PrintI64("percpu_slab_size", stats.percpu_metadata_bytes); + region.PrintI64("percpu_slab_residence", stats.percpu_metadata_bytes_res); + region.PrintI64("tcmalloc_page_size", uint64_t(kPageSize)); + region.PrintI64("tcmalloc_huge_page_size", uint64_t(tcmalloc::kHugePageSize)); + + // Print total process stats (inclusive of non-malloc sources). + tcmalloc::tcmalloc_internal::MemoryStats memstats; + if (GetMemoryStats(&memstats)) { + region.PrintI64("total_resident", uint64_t(memstats.rss)); + region.PrintI64("total_mapped", uint64_t(memstats.vss)); + } + + if (level >= 2) { + { + for (int cl = 0; cl < kNumClasses; ++cl) { + if (class_count[cl] > 0) { + uint64_t class_bytes = + class_count[cl] * Static::sizemap()->class_to_size(cl); + PbtxtRegion entry = region.CreateSubRegion("freelist"); + entry.PrintI64("sizeclass", Static::sizemap()->class_to_size(cl)); + entry.PrintI64("bytes", class_bytes); + } + } + } + + if (tcmalloc::UsePerCpuCache()) { + cpu_set_t allowed_cpus; + if (sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus) != 0) { + CPU_ZERO(&allowed_cpus); + } + + for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); + cpu < num_cpus; ++cpu) { + PbtxtRegion entry = region.CreateSubRegion("cpu_cache"); + uint64_t rbytes = Static::cpu_cache()->UsedBytes(cpu); + bool populated = Static::cpu_cache()->HasPopulated(cpu); + uint64_t unallocated = Static::cpu_cache()->Unallocated(cpu); + entry.PrintI64("cpu", uint64_t(cpu)); + entry.PrintI64("used", rbytes); + entry.PrintI64("unused", unallocated); + entry.PrintBool("active", CPU_ISSET(cpu, &allowed_cpus)); + entry.PrintBool("populated", populated); + } + } + } + Static::page_allocator()->PrintInPbtxt(®ion, /*tagged=*/false); + Static::page_allocator()->PrintInPbtxt(®ion, /*tagged=*/true); + // We do not collect tracking information in pbtxt. + + size_t limit_bytes; + bool is_hard; + std::tie(limit_bytes, is_hard) = Static::page_allocator()->limit(); + region.PrintI64("desired_usage_limit_bytes", limit_bytes); + region.PrintBool("hard_limit", is_hard); + region.PrintI64("limit_hits", Static::page_allocator()->limit_hits()); + + { + auto gwp_asan = region.CreateSubRegion("gwp_asan"); + Static::guardedpage_allocator()->PrintInPbtxt(&gwp_asan); + } + + region.PrintI64("memory_release_failures", tcmalloc::SystemReleaseErrors()); + + region.PrintBool("tcmalloc_per_cpu_caches", + tcmalloc::Parameters::per_cpu_caches()); + region.PrintI64("tcmalloc_max_per_cpu_cache_size", + tcmalloc::Parameters::max_per_cpu_cache_size()); + region.PrintI64("tcmalloc_max_total_thread_cache_bytes", + tcmalloc::Parameters::max_total_thread_cache_bytes()); +} + +} // namespace + +// Gets a human readable description of the current state of the malloc data +// structures. A part of the state is stored in pbtxt format in `buffer`, the +// rest of the state is stored in the old format (the same as in +// MallocExtension::GetStats) in `other_buffer`. Both buffers are +// null-terminated strings in a prefix of "buffer[0,buffer_length-1]" or +// "other_buffer[0,other_buffer_length-1]". Returns the actual written sizes for +// buffer and other_buffer. +// +// REQUIRES: buffer_length > 0 and other_buffer_length > 0. +// +// TODO(b/130249686): This is NOT YET ready to use. +extern "C" ABSL_ATTRIBUTE_UNUSED int MallocExtension_Internal_GetStatsInPbtxt( + char* buffer, int buffer_length) { + ASSERT(buffer_length > 0); + TCMalloc_Printer printer(buffer, buffer_length); + + // Print level one stats unless lots of space is available + if (buffer_length < 10000) { + DumpStatsInPbtxt(&printer, 1); + } else { + DumpStatsInPbtxt(&printer, 2); + } + + size_t required = printer.SpaceRequired(); + // SpaceRequired includes the null terminator. + if (required > 0) { + required--; + } + + if (buffer_length > required) { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + required += tcmalloc::GetRegionFactory()->GetStatsInPbtxt( + absl::Span(buffer + required, buffer_length - required)); + } + + return required; +} + +static void PrintStats(int level) { + const int kBufferSize = (TCMALLOC_HAVE_TRACKING ? 2 << 20 : 64 << 10); + char* buffer = new char[kBufferSize]; + TCMalloc_Printer printer(buffer, kBufferSize); + DumpStats(&printer, level); + (void)write(STDERR_FILENO, buffer, strlen(buffer)); + delete[] buffer; +} + +// This function computes a profile that maps a live stack trace to +// the number of bytes of central-cache memory pinned by an allocation +// at that stack trace. +static std::unique_ptr +DumpFragmentationProfile() { + auto profile = absl::make_unique( + tcmalloc::ProfileType::kFragmentation, 1, true, true); + + { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + for (Span* s : Static::sampled_objects_) { + // Compute fragmentation to charge to this sample: + StackTrace* const t = s->sampled_stack(); + if (t->proxy == nullptr) { + // There is just one object per-span, and neighboring spans + // can be released back to the system, so we charge no + // fragmentation to this sampled object. + continue; + } + + // Fetch the span on which the proxy lives so we can examine its + // co-residents. + const PageID p = reinterpret_cast(t->proxy) >> kPageShift; + Span* span = Static::pagemap()->GetDescriptor(p); + if (span == nullptr) { + // Avoid crashes in production mode code, but report in tests. + ASSERT(span != nullptr); + continue; + } + + const double frag = span->Fragmentation(); + if (frag > 0) { + profile->AddTrace(frag, *t); + } + } + } + return profile; +} + +// If is true, the caller expects a profile where sampling has been +// compensated for (that is, it reports 8000 16-byte objects iff we believe the +// program has that many live objects.) Otherwise, do not adjust for sampling +// (the caller will do so somehow.) +static std::unique_ptr +DumpHeapProfile(bool unsample) { + auto profile = absl::make_unique( + tcmalloc::ProfileType::kHeap, Sampler::GetSamplePeriod(), true, unsample); + absl::base_internal::SpinLockHolder h(&pageheap_lock); + for (Span* s : Static::sampled_objects_) { + profile->AddTrace(1.0, *s->sampled_stack()); + } + return profile; +} + +class AllocationSampleList; + +class AllocationSample + : public tcmalloc::tcmalloc_internal::AllocationProfilingTokenBase { + public: + AllocationSample(); + ~AllocationSample() override; + + tcmalloc::Profile Stop() && override; + + std::unique_ptr + StopInternal() &&; + + private: + std::unique_ptr mallocs_; + AllocationSample* next GUARDED_BY(pageheap_lock); + friend class AllocationSampleList; +}; + +class AllocationSampleList { + public: + void Add(AllocationSample* as) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + as->next = first_; + first_ = as; + } + + // This list is very short and we're nowhere near a hot path, just walk + void Remove(AllocationSample* as) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + AllocationSample** link = &first_; + AllocationSample* cur = first_; + while (cur != as) { + CHECK_CONDITION(cur != nullptr); + link = &cur->next; + cur = cur->next; + } + *link = as->next; + } + + void ReportMalloc(const struct StackTrace& sample) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + AllocationSample* cur = first_; + while (cur != nullptr) { + cur->mallocs_->AddTrace(1.0, sample); + cur = cur->next; + } + } + + private: + AllocationSample* first_; +} allocation_samples_ GUARDED_BY(pageheap_lock); + +AllocationSample::AllocationSample() { + mallocs_ = absl::make_unique( + tcmalloc::ProfileType::kAllocations, Sampler::GetSamplePeriod(), true, + true); + absl::base_internal::SpinLockHolder h(&pageheap_lock); + allocation_samples_.Add(this); +} + +AllocationSample::~AllocationSample() { + if (mallocs_ == nullptr) { + return; + } + + // deleted before ending profile, do it for them + { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + allocation_samples_.Remove(this); + } +} + +std::unique_ptr + AllocationSample::StopInternal() && LOCKS_EXCLUDED(pageheap_lock) { + // We need to remove ourselves from the allocation_samples_ list before we + // mutate mallocs_; + if (mallocs_) { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + allocation_samples_.Remove(this); + } + return std::move(mallocs_); +} + +tcmalloc::Profile AllocationSample::Stop() && LOCKS_EXCLUDED(pageheap_lock) { + // We need to remove ourselves from the allocation_samples_ list before we + // mutate mallocs_; + if (mallocs_) { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + allocation_samples_.Remove(this); + } + return tcmalloc::tcmalloc_internal::ProfileAccessor::MakeProfile( + std::move(mallocs_)); +} + +extern "C" void MallocExtension_Internal_GetStats(std::string* ret) { + for (size_t shift = 17; shift < 22; shift++) { + const size_t size = 1 << shift; + // Double ret's size until we succeed in writing the buffer without + // truncation. + // + // TODO(b/142931922): printer only writes data and does not read it. + // Leverage https://wg21.link/P1072 when it is standardized. + ret->resize(size - 1); + + size_t written_size = TCMalloc_Internal_GetStats(&*ret->begin(), size - 1); + if (written_size < size - 1) { + // We did not truncate. + ret->resize(written_size); + break; + } + } +} + +extern "C" size_t TCMalloc_Internal_GetStats(char* buffer, + size_t buffer_length) { + TCMalloc_Printer printer(buffer, buffer_length); + if (buffer_length < 10000) { + DumpStats(&printer, 1); + } else { + DumpStats(&printer, 2); + } + + printer.printf("\nLow-level allocator stats:\n"); + printer.printf("Memory Release Failures: %d\n", + tcmalloc::SystemReleaseErrors()); + + size_t n = printer.SpaceRequired(); + // SpaceRequired includes the null terminator. Remove it. + if (n > 0) { + n--; + } + + size_t bytes_remaining = buffer_length > n ? buffer_length - n : 0; + if (bytes_remaining > 0) { + n += tcmalloc::GetRegionFactory()->GetStats( + absl::Span(buffer + n, bytes_remaining)); + } + + return n; +} + +extern "C" const tcmalloc::tcmalloc_internal::ProfileBase* +TCMalloc_Internal_SnapshotCurrent(tcmalloc::ProfileType type) { + switch (type) { + case tcmalloc::ProfileType::kHeap: + return DumpHeapProfile(true).release(); + case tcmalloc::ProfileType::kFragmentation: + return DumpFragmentationProfile().release(); + case tcmalloc::ProfileType::kPeakHeap: + return Static::peak_heap_tracker()->DumpSample().release(); + default: + return nullptr; + } +} + +extern "C" tcmalloc::tcmalloc_internal::AllocationProfilingTokenBase* +TCMalloc_Internal_StartAllocationProfiling() { + return new AllocationSample(); +} + +namespace tcmalloc { + +bool GetNumericProperty(const char* name_data, size_t name_size, + size_t* value) { + ASSERT(name_data != nullptr); + ASSERT(value != nullptr); + const absl::string_view name(name_data, name_size); + + // This is near the top since ReleasePerCpuMemoryToOS() calls it frequently. + if (name == "tcmalloc.per_cpu_caches_active") { + *value = Static::CPUCacheActive(); + return true; + } + + if (name == "generic.virtual_memory_used") { + TCMallocStats stats; + ExtractStats(&stats, nullptr, nullptr, nullptr, false); + *value = VirtualMemoryUsed(stats); + return true; + } + + if (name == "generic.physical_memory_used") { + TCMallocStats stats; + ExtractStats(&stats, nullptr, nullptr, nullptr, false); + *value = PhysicalMemoryUsed(stats); + return true; + } + + if (name == "generic.current_allocated_bytes" || + name == "generic.bytes_in_use_by_app") { + TCMallocStats stats; + ExtractStats(&stats, nullptr, nullptr, nullptr, false); + *value = InUseByApp(stats); + return true; + } + + if (name == "generic.heap_size") { + absl::base_internal::SpinLockHolder l(&pageheap_lock); + BackingStats stats = Static::page_allocator()->stats(); + *value = stats.system_bytes - stats.unmapped_bytes; + return true; + } + + if (name == "tcmalloc.central_cache_free") { + TCMallocStats stats; + ExtractStats(&stats, nullptr, nullptr, nullptr, false); + *value = stats.central_bytes; + return true; + } + + if (name == "tcmalloc.cpu_free") { + TCMallocStats stats; + ExtractStats(&stats, nullptr, nullptr, nullptr, false); + *value = stats.per_cpu_bytes; + return true; + } + + if (name == "tcmalloc.slack_bytes") { + // Kept for backwards compatibility. Now defined externally as: + // pageheap_free_bytes + pageheap_unmapped_bytes. + absl::base_internal::SpinLockHolder l(&pageheap_lock); + BackingStats stats = Static::page_allocator()->stats(); + *value = stats.free_bytes + stats.unmapped_bytes; + return true; + } + + if (name == "tcmalloc.pageheap_free_bytes" || + name == "tcmalloc.page_heap_free") { + absl::base_internal::SpinLockHolder l(&pageheap_lock); + *value = Static::page_allocator()->stats().free_bytes; + return true; + } + + if (name == "tcmalloc.pageheap_unmapped_bytes" || + name == "tcmalloc.page_heap_unmapped") { + absl::base_internal::SpinLockHolder l(&pageheap_lock); + *value = Static::page_allocator()->stats().unmapped_bytes; + return true; + } + + if (name == "tcmalloc.page_algorithm") { + absl::base_internal::SpinLockHolder l(&pageheap_lock); + *value = Static::page_allocator()->algorithm(); + return true; + } + + if (name == "tcmalloc.max_total_thread_cache_bytes") { + absl::base_internal::SpinLockHolder l(&pageheap_lock); + *value = ThreadCache::overall_thread_cache_size(); + return true; + } + + if (name == "tcmalloc.current_total_thread_cache_bytes" || + name == "tcmalloc.thread_cache_free") { + TCMallocStats stats; + ExtractStats(&stats, nullptr, nullptr, nullptr, false); + *value = stats.thread_bytes; + return true; + } + + if (name == "tcmalloc.thread_cache_count") { + TCMallocStats stats; + ExtractStats(&stats, nullptr, nullptr, nullptr, false); + *value = stats.tc_stats.in_use; + return true; + } + + if (name == "tcmalloc.local_bytes") { + TCMallocStats stats; + ExtractStats(&stats, nullptr, nullptr, nullptr, false); + *value = stats.thread_bytes + stats.per_cpu_bytes; + return true; + } + + if (name == "tcmalloc.external_fragmentation_bytes") { + TCMallocStats stats; + ExtractStats(&stats, nullptr, nullptr, nullptr, false); + *value = + (stats.pageheap.free_bytes + stats.central_bytes + stats.per_cpu_bytes + + stats.transfer_bytes + stats.thread_bytes + stats.metadata_bytes); + return true; + } + + if (name == "tcmalloc.metadata_bytes") { + TCMallocStats stats; + ExtractStats(&stats, nullptr, nullptr, nullptr, true); + *value = stats.metadata_bytes; + return true; + } + + if (name == "tcmalloc.transfer_cache_free") { + TCMallocStats stats; + ExtractStats(&stats, nullptr, nullptr, nullptr, false); + *value = stats.transfer_bytes; + return true; + } + + bool want_hard_limit = (name == "tcmalloc.hard_usage_limit_bytes"); + if (want_hard_limit || name == "tcmalloc.desired_usage_limit_bytes") { + size_t amount; + bool is_hard; + std::tie(amount, is_hard) = Static::page_allocator()->limit(); + if (want_hard_limit != is_hard) { + amount = std::numeric_limits::max(); + } + *value = amount; + return true; + } + + if (name == "tcmalloc.required_bytes") { + TCMallocStats stats; + ExtractStats(&stats, nullptr, nullptr, nullptr, false); + *value = RequiredBytes(stats); + return true; + } + + const absl::string_view kExperimentPrefix = "tcmalloc.experiment."; + if (absl::StartsWith(name, kExperimentPrefix)) { + absl::optional exp = + FindExperimentByName(absl::StripPrefix(name, kExperimentPrefix)); + if (exp.has_value()) { + *value = IsExperimentActive(*exp) ? 1 : 0; + return true; + } + } + + return false; +} + +tcmalloc::MallocExtension::Ownership GetOwnership(const void* ptr) { + const PageID p = reinterpret_cast(ptr) >> kPageShift; + return Static::pagemap()->GetDescriptor(p) + ? tcmalloc::MallocExtension::Ownership::kOwned + : tcmalloc::MallocExtension::Ownership::kNotOwned; +} + +extern "C" bool MallocExtension_Internal_GetNumericProperty( + const char* name_data, size_t name_size, size_t* value) { + return GetNumericProperty(name_data, name_size, value); +} + +// In free fast-path we handle delete hooks by delegating work to slower +// function that both performs delete hooks calls and does free. This is done so +// that free fast-path only does tail calls, which allow compiler to avoid +// generating costly prologue/epilogue for fast-path. +void free_fast_path_disabled(void* ptr); + +} // namespace tcmalloc + +extern "C" void MallocExtension_Internal_GetMemoryLimit( + tcmalloc::MallocExtension::MemoryLimit* limit) { + ASSERT(limit != nullptr); + + std::tie(limit->limit, limit->hard) = Static::page_allocator()->limit(); +} + +extern "C" void MallocExtension_Internal_SetMemoryLimit( + const tcmalloc::MallocExtension::MemoryLimit* limit) { + ASSERT(limit != nullptr); + + if (!limit->hard) { + tcmalloc::Parameters::set_heap_size_hard_limit(0); + tcmalloc::Static::page_allocator()->set_limit(limit->limit, + false /* !hard */); + } else { + tcmalloc::Parameters::set_heap_size_hard_limit(limit->limit); + } +} + +extern "C" void MallocExtension_MarkThreadIdle() { ThreadCache::BecomeIdle(); } + +extern "C" tcmalloc::AddressRegionFactory* +TCMalloc_Internal_GetRegionFactory() { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + return tcmalloc::GetRegionFactory(); +} + +extern "C" void TCMalloc_Internal_SetRegionFactory( + tcmalloc::AddressRegionFactory* factory) { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + tcmalloc::SetRegionFactory(factory); +} + +extern "C" void MallocExtension_Internal_ReleaseMemoryToSystem( + size_t num_bytes) { + // We're dropping page heap lock while actually calling to kernel to release + // pages. To avoid confusing ourselves with extra_bytes_released handling, + // lets do separate lock just for release. + static absl::base_internal::SpinLock release_lock( + absl::base_internal::kLinkerInitialized); + // ReleaseMemoryToSystem() might release more than the requested bytes because + // the page heap releases at the span granularity, and spans are of wildly + // different sizes. This keeps track of the extra bytes bytes released so + // that the app can periodically call ReleaseMemoryToSystem() to release + // memory at a constant rate. + ABSL_CONST_INIT static size_t extra_bytes_released; + + absl::base_internal::SpinLockHolder rh(&release_lock); + + absl::base_internal::SpinLockHolder h(&pageheap_lock); + if (num_bytes <= extra_bytes_released) { + // We released too much on a prior call, so don't release any + // more this time. + extra_bytes_released = extra_bytes_released - num_bytes; + num_bytes = 0; + } else { + num_bytes = num_bytes - extra_bytes_released; + } + + Length num_pages; + if (num_bytes > 0) { + // A sub-page size request may round down to zero. Assume the caller wants + // some memory released. + num_pages = std::max(num_bytes >> kPageShift, 1); + } else { + num_pages = 0; + } + size_t bytes_released = + Static::page_allocator()->ReleaseAtLeastNPages(num_pages) << kPageShift; + if (bytes_released > num_bytes) { + extra_bytes_released = bytes_released - num_bytes; + } else { + // The PageHeap wasn't able to release num_bytes. Don't try to compensate + // with a big release next time. + extra_bytes_released = 0; + } +} + +// nallocx slow path. +// Moved to a separate function because size_class_with_alignment is not inlined +// which would cause nallocx to become non-leaf function with stack frame and +// stack spills. ABSL_ATTRIBUTE_ALWAYS_INLINE does not work on +// size_class_with_alignment, compiler barks that it can't inline the function +// somewhere. +static ABSL_ATTRIBUTE_NOINLINE size_t nallocx_slow(size_t size, int flags) { + Static::InitIfNecessary(); + size_t align = static_cast(1ull << (flags & 0x3f)); + uint32_t cl; + if (ABSL_PREDICT_TRUE(Static::sizemap()->GetSizeClass(size, align, &cl))) { + ASSERT(cl != 0); + return Static::sizemap()->class_to_size(cl); + } else { + return tcmalloc::pages(size) << kPageShift; + } +} + +// The nallocx function allocates no memory, but it performs the same size +// computation as the malloc function, and returns the real size of the +// allocation that would result from the equivalent malloc function call. +// nallocx is a malloc extension originally implemented by jemalloc: +// http://www.unix.com/man-page/freebsd/3/nallocx/ +extern "C" size_t nallocx(size_t size, int flags) noexcept { + if (ABSL_PREDICT_FALSE(!Static::IsInited() || flags != 0)) { + return nallocx_slow(size, flags); + } + uint32_t cl; + if (ABSL_PREDICT_TRUE(Static::sizemap()->GetSizeClass(size, &cl))) { + ASSERT(cl != 0); + return Static::sizemap()->class_to_size(cl); + } else { + return tcmalloc::pages(size) << kPageShift; + } +} + +extern "C" tcmalloc::MallocExtension::Ownership +MallocExtension_Internal_GetOwnership(const void* ptr) { + return tcmalloc::GetOwnership(ptr); +} + +extern "C" void MallocExtension_Internal_GetProperties( + std::map* result) { + TCMallocStats stats; + ExtractStats(&stats, nullptr, nullptr, nullptr, true); + + const uint64_t virtual_memory_used = VirtualMemoryUsed(stats); + const uint64_t physical_memory_used = PhysicalMemoryUsed(stats); + const uint64_t bytes_in_use_by_app = InUseByApp(stats); + + result->clear(); + // Virtual Memory Used + (*result)["generic.virtual_memory_used"].value = virtual_memory_used; + // Physical Memory used + (*result)["generic.physical_memory_used"].value = physical_memory_used; + // Bytes in use By App + (*result)["generic.bytes_in_use_by_app"].value = bytes_in_use_by_app; + // Page Heap Free + (*result)["tcmalloc.page_heap_free"].value = stats.pageheap.free_bytes; + // Metadata Bytes + (*result)["tcmalloc.metadata_bytes"].value = stats.metadata_bytes; + // Heaps in Use + (*result)["tcmalloc.thread_cache_count"].value = stats.tc_stats.in_use; + // Central Cache Free List + (*result)["tcmalloc.central_cache_free"].value = stats.central_bytes; + // Transfer Cache Free List + (*result)["tcmalloc.transfer_cache_free"].value = stats.transfer_bytes; + // Per CPU Cache Free List + (*result)["tcmalloc.cpu_free"].value = stats.per_cpu_bytes; + (*result)["tcmalloc.per_cpu_caches_active"].value = Static::CPUCacheActive(); + // Thread Cache Free List + (*result)["tcmalloc.thread_cache_free"].value = stats.thread_bytes; + // Page Unmapped + (*result)["tcmalloc.pageheap_unmapped_bytes"].value = + stats.pageheap.unmapped_bytes; + (*result)["tcmalloc.page_heap_unmapped"].value = + stats.pageheap.unmapped_bytes; + + (*result)["tcmalloc.page_algorithm"].value = + Static::page_allocator()->algorithm(); + + tcmalloc::FillExperimentProperties(result); + tcmalloc::tracking::GetProperties(result); +} + +extern "C" size_t MallocExtension_Internal_ReleaseCpuMemory(int cpu) { + size_t bytes = 0; + if (Static::CPUCacheActive()) { + bytes = Static::cpu_cache()->Reclaim(cpu); + } + return bytes; +} + +//------------------------------------------------------------------- +// Helpers for the exported routines below +//------------------------------------------------------------------- + +#ifdef ABSL_HAVE_TLS +// See the comment on ThreadCache::thread_local_data_ regarding +// ABSL_ATTRIBUTE_INITIAL_EXEC. +__thread Sampler thread_sampler_ ABSL_ATTRIBUTE_INITIAL_EXEC; + +inline Sampler* GetThreadSampler() { return &thread_sampler_; } + +#else + +inline Sampler* GetThreadSampler() { + ThreadCache* heap = ThreadCache::GetCache(); + return heap->GetSampler(); +} + +#endif + +static void FreeSmallSlow(void* ptr, size_t cl); + +namespace { + +// Sets `*psize` to `size`, +inline void SetCapacity(size_t size, std::nullptr_t) {} +inline void SetCapacity(size_t size, size_t* psize) { *psize = size; } + +// Sets `*psize` to the size for the size class in `cl`, +inline void SetClassCapacity(size_t size, std::nullptr_t) {} +inline void SetClassCapacity(uint32_t cl, size_t* psize) { + *psize = Static::sizemap()->class_to_size(cl); +} + +// Sets `*psize` to the size for the size class in `cl` if `ptr` is not null, +// else `*psize` is set to 0. This method is overloaded for `nullptr_t` below, +// allowing the compiler to optimize code between regular and size returning +// allocation operations. +inline void SetClassCapacity(const void*, uint32_t, std::nullptr_t) {} +inline void SetClassCapacity(const void* ptr, uint32_t cl, size_t* psize) { + if (ABSL_PREDICT_TRUE(ptr != nullptr)) { + *psize = Static::sizemap()->class_to_size(cl); + } else { + *psize = 0; + } +} + +// Sets `*psize` to the size in pages corresponding to the requested size in +// `size` if `ptr` is not null, else `*psize` is set to 0. This method is +// overloaded for `nullptr_t` below, allowing the compiler to optimize code +// between regular and size returning allocation operations. +inline void SetPagesCapacity(const void*, size_t, std::nullptr_t) {} +inline void SetPagesCapacity(const void* ptr, size_t size, size_t* psize) { + if (ABSL_PREDICT_TRUE(ptr != nullptr)) { + *psize = tcmalloc::pages(size) << kPageShift; + } else { + *psize = 0; + } +} + +} // namespace + +// This is used to reflect our knowledge of whether fast path readiness is +// checked and which value it was. +enum class FreeFastPath { + ENABLED, // This value is used when we checked fast-path readiness early and + // found it enabled. + DISABLED, // This value is used when we explicitly want to save some code + // size and don't need inlining free fast path. + UNKNOWN // This value is used when we have not checked fast-path yet or + // when we possibly changed it just now, and still want freeing + // code be reasonably fast in common case. +}; + +// Helper for do_free_with_cl +template +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void FreeSmall(void* ptr, size_t cl) { + if (fast_path == FreeFastPath::DISABLED) { + // Take the slow path. + return FreeSmallSlow(ptr, cl); + } else if (fast_path != FreeFastPath::ENABLED && + ABSL_PREDICT_FALSE(!GetThreadSampler()->IsOnFastPath())) { + // Take the slow path. + return FreeSmallSlow(ptr, cl); + } + +#ifndef TCMALLOC_DEPRECATED_PERTHREAD + // The CPU Cache is enabled, so we're able to take the fastpath. + ASSERT(Static::CPUCacheActive()); + ASSERT(tcmalloc::subtle::percpu::IsFastNoInit()); + + Static::cpu_cache()->Deallocate(ptr, cl); +#else // TCMALLOC_DEPRECATED_PERTHREAD + ThreadCache* cache = ThreadCache::GetCacheIfPresent(); + + // IsOnFastPath does not track whether or not we have an active ThreadCache on + // this thread, so we need to check cache for nullptr. + if (ABSL_PREDICT_FALSE(cache == nullptr)) { + FreeSmallSlow(ptr, cl); + return; + } + + cache->Deallocate(ptr, cl); +#endif // TCMALLOC_DEPRECATED_PERTHREAD +} + +// this helper function is used when FreeSmall (defined above) hits +// the case of thread state not being in per-cpu mode or hitting case +// of no thread cache. This happens when thread state is not yet +// properly initialized with real thread cache or with per-cpu mode, +// or when thread state is already destroyed as part of thread +// termination. +// +// We explicitly prevent inlining it to keep it out of fast-path, so +// that fast-path only has tail-call, so that fast-path doesn't need +// function prologue/epilogue. +ABSL_ATTRIBUTE_NOINLINE +static void FreeSmallSlow(void* ptr, size_t cl) { + // Refresh the fast path state. + GetThreadSampler()->UpdateFastPathState(); + if (ABSL_PREDICT_TRUE(tcmalloc::UsePerCpuCache())) { + Static::cpu_cache()->Deallocate(ptr, cl); + } else if (ThreadCache* cache = ThreadCache::GetCacheIfPresent()) { + // TODO(b/134691947): If we reach this path from the ThreadCache fastpath, + // we've already checked that UsePerCpuCache is false and cache == nullptr. + // Consider optimizing this. + cache->Deallocate(ptr, cl); + } else { + // This thread doesn't have thread-cache yet or already. Delete directly + // into central cache. + Static::transfer_cache()[cl].InsertRange(absl::Span(&ptr, 1), 1); + } +} + +namespace { + +// If this allocation can be guarded, and if it's time to do a guarded sample, +// returns a guarded allocation Span. Otherwise returns nullptr. +static void* TrySampleGuardedAllocation(size_t size, size_t alignment, + Length num_pages) { + if (num_pages == 1 && GetThreadSampler()->ShouldSampleGuardedAllocation()) { + // The num_pages == 1 constraint ensures that size <= kPageSize. And since + // alignments above kPageSize cause cl == 0, we're also guaranteed + // alignment <= kPageSize + // + // In all cases kPageSize <= GPA::page_size_, so Allocate's preconditions + // are met. + return Static::guardedpage_allocator()->Allocate(size, alignment); + } + return nullptr; +} + +// Performs sampling for already occurred allocation of object. +// +// For very small object sizes, object is used as 'proxy' and full +// page with sampled marked is allocated instead. +// +// For medium-sized objects that have single instance per span, +// they're simply freed and fresh page span is allocated to represent +// sampling. +// +// For large objects (i.e. allocated with do_malloc_pages) they are +// also fully reused and their span is marked as sampled. +// +// Note that do_free_with_size assumes sampled objects have +// page-aligned addresses. Please change both functions if need to +// invalidate the assumption. +// +// Note that cl might not match requested_size in case of +// memalign. I.e. when larger than requested allocation is done to +// satisfy alignment constraint. +// +// In case of out-of-memory condition when allocating span or +// stacktrace struct, this function simply cheats and returns original +// object. As if no sampling was requested. +static void* SampleifyAllocation(size_t requested_size, size_t weight, + size_t requested_alignment, size_t cl, + void* obj, Span* span, size_t* capacity) { + CHECK_CONDITION((cl != 0 && obj != nullptr && span == nullptr) || + (cl == 0 && obj == nullptr && span != nullptr)); + + void* proxy = nullptr; + void* guarded_alloc = nullptr; + size_t allocated_size; + bool success = false; + + // requested_alignment = 1 means 'small size table alignment was used' + // Historically this is reported as requested_alignment = 0 + if (requested_alignment == 1) { + requested_alignment = 0; + } + + if (cl != 0) { + ASSERT(cl == Static::pagemap()->sizeclass( + reinterpret_cast(obj) >> kPageShift)); + + allocated_size = Static::sizemap()->class_to_size(cl); + + // If the caller didn't provide a span, allocate one: + Length num_pages = tcmalloc::pages(allocated_size); + if ((guarded_alloc = TrySampleGuardedAllocation( + requested_size, requested_alignment, num_pages))) { + ASSERT(tcmalloc::IsTaggedMemory(guarded_alloc)); + const PageID p = reinterpret_cast(guarded_alloc) >> kPageShift; + absl::base_internal::SpinLockHolder h(&pageheap_lock); + span = Span::New(p, num_pages); + Static::pagemap()->Set(p, span); + // If we report capacity back from a size returning allocation, we can not + // report the allocated_size, as we guard the size to 'requested_size', + // and we maintain the invariant that GetAllocatedSize() must match the + // returned size from size returning allocations. So in that case, we + // report the requested size for both capacity and GetAllocatedSize(). + if (capacity) allocated_size = requested_size; + } else if ((span = Static::page_allocator()->New( + num_pages, /*tagged=*/true)) == nullptr) { + if (capacity) *capacity = allocated_size; + return obj; + } + + size_t span_size = Static::sizemap()->class_to_pages(cl) << kPageShift; + size_t objects_per_span = span_size / allocated_size; + + if (objects_per_span != 1) { + ASSERT(objects_per_span > 1); + proxy = obj; + obj = nullptr; + } + } else { + // Set allocated_size to the exact size for a page allocation. + // NOTE: if we introduce gwp-asan sampling / guarded allocations + // for page allocations, then we need to revisit do_malloc_pages as + // the current assumption is that only class sized allocs are sampled + // for gwp-asan. + allocated_size = span->bytes_in_span(); + } + if (capacity) *capacity = allocated_size; + + ASSERT(span != nullptr); + + // Grab the stack trace outside the heap lock + StackTrace tmp; + tmp.proxy = proxy; + tmp.depth = absl::GetStackTrace(tmp.stack, tcmalloc::kMaxStackDepth, 1); + tmp.requested_size = requested_size; + tmp.requested_alignment = requested_alignment; + tmp.allocated_size = allocated_size; + tmp.weight = weight; + + { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + // Allocate stack trace + StackTrace *stack = Static::stacktrace_allocator()->New(); + if (stack != nullptr) { + allocation_samples_.ReportMalloc(tmp); + *stack = tmp; + span->Sample(stack); + // lets flag success and release the pageheap_lock + success = true; + } + } + + if (success) { + Static::peak_heap_tracker()->MaybeSaveSample(); + } + + if (!success) { + // We couldn't allocate a stack trace. We have a perfectly good + // span. Use it (getting rid of any proxy/small object.) + if (proxy != nullptr) obj = proxy; + } + + if (obj != nullptr) { + // We delete directly into central cache to avoid tracking this as + // purely internal deletion. We've already (correctly) tracked + // this allocation as either malloc hit or malloc miss, and we + // must not count anything else for this allocation. + Static::transfer_cache()[cl].InsertRange(absl::Span(&obj, 1), 1); + } + return guarded_alloc ? guarded_alloc : span->start_address(); +} + +// ShouldSampleAllocation() is called when an allocation of the given requested +// size is in progress. It returns the sampling weight of the allocation if it +// should be "sampled," and 0 otherwise. See SampleifyAllocation(). +// +// Sampling is done based on requested sizes and later unskewed during profile +// generation. +inline size_t ShouldSampleAllocation(size_t size) { + return GetThreadSampler()->RecordAllocation(size); +} + +inline void* do_malloc_pages(size_t size, size_t alignment) { + // Page allocator does not deal well with num_pages = 0. + Length num_pages = std::max(tcmalloc::pages(size), 1); + + Span* span = Static::page_allocator()->NewAligned( + num_pages, tcmalloc::pages(alignment), /*tagged=*/false); + + if (span == nullptr) { + return nullptr; + } + + void* result = span->start_address(); + + if (size_t weight = ShouldSampleAllocation(size)) { + CHECK_CONDITION(result == SampleifyAllocation(size, weight, alignment, 0, + nullptr, span, nullptr)); + } + + return result; +} + +template +inline void* ABSL_ATTRIBUTE_ALWAYS_INLINE AllocSmall(Policy policy, size_t cl, + size_t size, + CapacityPtr capacity) { + ASSERT(cl != 0); + void* result; + + if (tcmalloc::UsePerCpuCache()) { + result = Static::cpu_cache()->Allocate(cl); + } else { + result = ThreadCache::GetCache()->Allocate(cl); + } + + if (!Policy::can_return_nullptr()) { + ASSUME(result != nullptr); + } + + if (ABSL_PREDICT_FALSE(result == nullptr)) { + SetCapacity(0, capacity); + return nullptr; + } + size_t weight; + if (ABSL_PREDICT_FALSE(weight = ShouldSampleAllocation(size))) { + return SampleifyAllocation(size, weight, policy.align(), cl, result, + nullptr, capacity); + } + SetClassCapacity(cl, capacity); + return result; +} + +// Handles freeing object that doesn't have size class, i.e. which +// is either large or sampled. We explicitly prevent inlining it to +// keep it out of fast-path. This helps avoid expensive +// prologue/epiloge for fast-path freeing functions. +ABSL_ATTRIBUTE_NOINLINE +static void do_free_pages(void* ptr, const PageID p) { + GetThreadSampler()->UpdateFastPathState(); + + void* proxy = nullptr; + size_t size; + bool notify_sampled_alloc = false; + + Span* span = Static::pagemap()->GetExistingDescriptor(p); + ASSERT(span != nullptr); + { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + ASSERT(span->first_page() == p); + if (StackTrace* st = span->Unsample()) { + proxy = st->proxy; + size = st->allocated_size; + if (proxy == nullptr && size <= kMaxSize) { + tcmalloc::tracking::Report(tcmalloc::kFreeMiss, + Static::sizemap()->SizeClass(size), 1); + } + notify_sampled_alloc = true; + Static::stacktrace_allocator()->Delete(st); + } + if (tcmalloc::IsTaggedMemory(ptr)) { + if (Static::guardedpage_allocator()->PointerIsMine(ptr)) { + // Release lock while calling Deallocate() since it does a system call. + pageheap_lock.Unlock(); + Static::guardedpage_allocator()->Deallocate(ptr); + pageheap_lock.Lock(); + Span::Delete(span); + } else { + ASSERT(reinterpret_cast(ptr) % kPageSize == 0); + Static::page_allocator()->Delete(span, /*tagged=*/true); + } + } else { + ASSERT(reinterpret_cast(ptr) % kPageSize == 0); + Static::page_allocator()->Delete(span, /*tagged=*/false); + } + } + + if (notify_sampled_alloc) { + } + + if (proxy) { + FreeSmall(proxy, + Static::sizemap()->SizeClass(size)); + } +} + +#ifndef NDEBUG +static size_t GetSizeClass(void* ptr) { + const PageID p = reinterpret_cast(ptr) >> kPageShift; + return Static::pagemap()->sizeclass(p); +} +#endif + +// Helper for the object deletion (free, delete, etc.). Inputs: +// ptr is object to be freed +// cl is the size class of that object, or 0 if it's unknown +// have_cl is true iff cl is known and is non-0. +// +// Note that since have_cl is compile-time constant, genius compiler +// would not need it. Since it would be able to somehow infer that +// GetSizeClass never produces 0 cl, and so it +// would know that places that call this function with explicit 0 is +// "have_cl-case" and others are "!have_cl-case". But we certainly +// don't have such compiler. See also do_free_with_size below. +template +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void do_free_with_cl(void* ptr, size_t cl) { + // !have_cl -> cl == 0 + ASSERT(have_cl || cl == 0); + + const PageID p = reinterpret_cast(ptr) >> kPageShift; + + // if we have_cl, then we've excluded ptr == nullptr case. See + // comment in do_free_with_size. Thus we only bother testing nullptr + // in non-sized case. + // + // Thus: ptr == nullptr -> !have_cl + ASSERT(ptr != nullptr || !have_cl); + if (!have_cl && ABSL_PREDICT_FALSE(ptr == nullptr)) { + return; + } + + // ptr must be a result of a previous malloc/memalign/... call, and + // therefore static initialization must have already occurred. + ASSERT(Static::IsInited()); + + if (!have_cl) { + cl = Static::pagemap()->sizeclass(p); + } + if (have_cl || ABSL_PREDICT_TRUE(cl != 0)) { + ASSERT(cl == GetSizeClass(ptr)); + ASSERT(ptr != nullptr); + ASSERT(!Static::pagemap()->GetExistingDescriptor(p)->sampled()); + FreeSmall(ptr, cl); + } else { + do_free_pages(ptr, p); + } +} + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void do_free(void* ptr) { + if (ABSL_PREDICT_FALSE(!GetThreadSampler()->IsOnFastPath())) { + // Take the slow path. + return tcmalloc::free_fast_path_disabled(ptr); + } + return do_free_with_cl(ptr, 0); +} + +void do_free_no_hooks(void* ptr) { + return do_free_with_cl(ptr, 0); +} + +template +bool CorrectSize(void* ptr, size_t size, AlignPolicy align); + +bool CorrectAlignment(void* ptr, std::align_val_t alignment); + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void FreePages(void* ptr) { + const PageID p = reinterpret_cast(ptr) >> kPageShift; + do_free_pages(ptr, p); +} + +template +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void do_free_with_size(void* ptr, + size_t size, + AlignPolicy align) { + ASSERT(CorrectSize(ptr, size, align)); + ASSERT(CorrectAlignment(ptr, static_cast(align.align()))); + + if (ABSL_PREDICT_FALSE(!GetThreadSampler()->IsOnFastPath())) { + // Take the slow path. + return tcmalloc::free_fast_path_disabled(ptr); + } + + // This is an optimized path that may be taken if the binary is compiled + // with -fsized-delete. We attempt to discover the size class cheaply + // without any cache misses by doing a plain computation that + // maps from size to size-class. + // + // The optimized path doesn't work with sampled objects, whose deletions + // trigger more operations and require to visit metadata. + if (ABSL_PREDICT_FALSE(tcmalloc::IsTaggedMemory(ptr))) { + // we don't know true class size of the ptr + if (ptr == nullptr) return; + return FreePages(ptr); + } + + // At this point, since ptr's tag bit is 1, it means that it + // cannot be nullptr either. Thus all code below may rely on ptr != + // nullptr. And particularly, since we're only caller of + // do_free_with_cl with have_cl == true, it means have_cl implies + // ptr != nullptr. + ASSERT(ptr != nullptr); + + uint32_t cl; + if (ABSL_PREDICT_FALSE( + !Static::sizemap()->GetSizeClass(size, align.align(), &cl))) { + // We couldn't calculate the size class, which means size > kMaxSize. + ASSERT(size > kMaxSize || align.align() > alignof(std::max_align_t)); + static_assert(kMaxSize >= kPageSize, "kMaxSize must be at least kPageSize"); + return FreePages(ptr); + } + + return do_free_with_cl(ptr, cl); +} + +inline size_t GetSize(const void* ptr) { + if (ptr == nullptr) return 0; + const PageID p = reinterpret_cast(ptr) >> kPageShift; + size_t cl = Static::pagemap()->sizeclass(p); + if (cl != 0) { + return Static::sizemap()->class_to_size(cl); + } else { + const Span* span = Static::pagemap()->GetExistingDescriptor(p); + if (span->sampled()) { + if (Static::guardedpage_allocator()->PointerIsMine(ptr)) { + return Static::guardedpage_allocator()->GetRequestedSize(ptr); + } + return span->sampled_stack()->allocated_size; + } else { + return span->bytes_in_span(); + } + } +} + +// Checks that an asserted object size for is valid. +template +bool CorrectSize(void* ptr, size_t size, AlignPolicy align) { + // size == 0 means we got no hint from sized delete, so we certainly don't + // have an incorrect one. + if (size == 0) return true; + if (ptr == nullptr) return true; + uint32_t cl = 0; + // Round-up passed in size to how much tcmalloc allocates for that size. + if (Static::guardedpage_allocator()->PointerIsMine(ptr)) { + size = Static::guardedpage_allocator()->GetRequestedSize(ptr); + } else if (Static::sizemap()->GetSizeClass(size, align.align(), &cl)) { + size = Static::sizemap()->class_to_size(cl); + } else { + size = tcmalloc::pages(size) << kPageShift; + } + size_t actual = GetSize(ptr); + if (actual == size) return true; + Log(kLog, __FILE__, __LINE__, "size check failed", actual, size, cl); + return false; +} + +// Checks that an asserted object has alignment. +bool CorrectAlignment(void* ptr, std::align_val_t alignment) { + size_t align = static_cast(alignment); + ASSERT((align & (align - 1)) == 0); + return ((reinterpret_cast(ptr) & (align - 1)) == 0); +} + +// Helpers for use by exported routines below or inside debugallocation.cc: + +inline void do_malloc_stats() { + PrintStats(1); +} + +inline int do_mallopt(int cmd, int value) { + return 1; // Indicates error +} + +#ifdef HAVE_STRUCT_MALLINFO +inline struct mallinfo do_mallinfo() { + TCMallocStats stats; + ExtractStats(&stats, nullptr, nullptr, nullptr, false); + + // Just some of the fields are filled in. + struct mallinfo info; + memset(&info, 0, sizeof(info)); + + // Unfortunately, the struct contains "int" field, so some of the + // size values will be truncated. + info.arena = static_cast(stats.pageheap.system_bytes); + info.fsmblks = static_cast(stats.thread_bytes + + stats.central_bytes + + stats.transfer_bytes); + info.fordblks = static_cast(stats.pageheap.free_bytes + + stats.pageheap.unmapped_bytes); + info.uordblks = static_cast(InUseByApp(stats)); + + return info; +} +#endif // HAVE_STRUCT_MALLINFO + +} // namespace + +// Slow path implementation. +// This function is used by `fast_alloc` if the allocation requires page sized +// allocations or some complex logic is required such as initialization, +// invoking new/delete hooks, sampling, etc. +// +// TODO(b/130771275): This function is marked as static, rather than appearing +// in the anonymous namespace, to workaround incomplete heapz filtering. +template +static void* ABSL_ATTRIBUTE_SECTION(google_malloc) + slow_alloc(Policy policy, size_t size, CapacityPtr capacity = nullptr) { + Static::InitIfNecessary(); + GetThreadSampler()->UpdateFastPathState(); + void* p; + uint32_t cl; + bool is_small = Static::sizemap()->GetSizeClass(size, policy.align(), &cl); + if (ABSL_PREDICT_TRUE(is_small)) { + p = AllocSmall(policy, cl, size, capacity); + } else { + p = do_malloc_pages(size, policy.align()); + // Set capacity to the exact size for a page allocation. + // This needs to be revisited if we introduce gwp-asan + // sampling / guarded allocations to do_malloc_pages(). + SetPagesCapacity(p, size, capacity); + if (ABSL_PREDICT_FALSE(p == nullptr)) { + return Policy::handle_oom(size); + } + } + if (Policy::invoke_hooks()) { + } + return p; +} + +template +static inline void* ABSL_ATTRIBUTE_ALWAYS_INLINE +fast_alloc(Policy policy, size_t size, CapacityPtr capacity = nullptr) { + // If size is larger than kMaxSize, it's not fast-path anymore. In + // such case, GetSizeClass will return false, and we'll delegate to the slow + // path. If malloc is not yet initialized, we may end up with cl == 0 + // (regardless of size), but in this case should also delegate to the slow + // path by the fast path check further down. + uint32_t cl; + bool is_small = Static::sizemap()->GetSizeClass(size, policy.align(), &cl); + if (ABSL_PREDICT_FALSE(!is_small)) { + return slow_alloc(policy, size, capacity); + } + + // When using per-thread caches, we have to check for the presence of the + // cache for this thread before we try to sample, as slow_alloc will + // also try to sample the allocation. +#ifdef TCMALLOC_DEPRECATED_PERTHREAD + ThreadCache* const cache = ThreadCache::GetCacheIfPresent(); + if (ABSL_PREDICT_FALSE(cache == nullptr)) { + return slow_alloc(policy, size, capacity); + } +#endif + // TryRecordAllocationFast() returns true if no extra logic is required, e.g.: + // - this allocation does not need to be sampled + // - no new/delete hooks need to be invoked + // - no need to initialize thread globals, data or caches. + // The method updates 'bytes until next sample' thread sampler counters. + if (ABSL_PREDICT_FALSE(!GetThreadSampler()->TryRecordAllocationFast(size))) { + return slow_alloc(policy, size, capacity); + } + + // Fast path implementation for allocating small size memory. + // This code should only be reached if all of the below conditions are met: + // - the size does not exceed the maximum size (size class > 0) + // - cpu / thread cache data has been initialized. + // - the allocation is not subject to sampling / gwp-asan. + // - no new/delete hook is installed and required to be called. + ASSERT(cl != 0); + void* ret; +#ifndef TCMALLOC_DEPRECATED_PERTHREAD + // The CPU cache should be ready. + ret = Static::cpu_cache()->Allocate(cl); +#else // !defined(TCMALLOC_DEPRECATED_PERTHREAD) + // The ThreadCache should be ready. + ASSERT(cache != nullptr); + ret = cache->Allocate(cl); +#endif // TCMALLOC_DEPRECATED_PERTHREAD + if (!Policy::can_return_nullptr()) { + ASSUME(ret != nullptr); + } + SetClassCapacity(ret, cl, capacity); + return ret; +} + +namespace tcmalloc { + +ABSL_ATTRIBUTE_SECTION(google_malloc) void free_fast_path_disabled(void* ptr) { + // Refresh the fast path state. + GetThreadSampler()->UpdateFastPathState(); + do_free_no_hooks(ptr); +} + +} // namespace tcmalloc + +extern "C" size_t MallocExtension_GetAllocatedSize(const void* ptr) { + ASSERT(!ptr || tcmalloc::GetOwnership(ptr) != + tcmalloc::MallocExtension::Ownership::kNotOwned); + return GetSize(ptr); +} + +extern "C" void MallocExtension_MarkThreadBusy() { + // Allocate to force the creation of a thread cache, but avoid + // invoking any hooks. + Static::InitIfNecessary(); + + if (tcmalloc::UsePerCpuCache()) { + return; + } + + do_free_no_hooks(slow_alloc(CppPolicy().Nothrow().WithoutHooks(), 0)); +} + +//------------------------------------------------------------------- +// Exported routines +//------------------------------------------------------------------- + +// depends on HAVE_STRUCT_MALLINFO, so needs to come after that. +#include "tcmalloc/libc_override.h" + +extern "C" ABSL_CACHELINE_ALIGNED void* TCMallocInternalMalloc( + size_t size) noexcept { + // Use TCMallocInternalMemalign to avoid requiring size % + // alignof(std::max_align_t) == 0. TCMallocInternalAlignedAlloc enforces this + // property. + return TCMallocInternalMemalign(alignof(std::max_align_t), size); +} + +extern "C" ABSL_CACHELINE_ALIGNED void* TCMallocInternalNew(size_t size) { + return fast_alloc(CppPolicy(), size); +} + +extern "C" ABSL_ATTRIBUTE_SECTION(google_malloc) tcmalloc::sized_ptr_t + tcmalloc_size_returning_operator_new(size_t size) { + size_t capacity; + void* p = fast_alloc(CppPolicy(), size, &capacity); + return {p, capacity}; +} + +extern "C" ABSL_CACHELINE_ALIGNED void* TCMallocInternalMalloc_aligned( + size_t size, std::align_val_t alignment) noexcept { + return fast_alloc(MallocPolicy().AlignAs(alignment), size); +} + +extern "C" ABSL_CACHELINE_ALIGNED void* TCMallocInternalNewAligned( + size_t size, std::align_val_t alignment) { + return fast_alloc(CppPolicy().AlignAs(alignment), size); +} + +extern "C" void* TCMallocInternalNewAligned_nothrow( + size_t size, std::align_val_t alignment, const std::nothrow_t& nt) noexcept +// Note: we use malloc rather than new, as we are allowed to return nullptr. +// The latter crashes in that case. +#ifdef TCMALLOC_ALIAS + TCMALLOC_ALIAS(TCMallocInternalMalloc_aligned); +#else +{ + return fast_alloc(CppPolicy().Nothrow().AlignAs(alignment), size); +} +#endif // TCMALLOC_ALIAS + +extern "C" ABSL_CACHELINE_ALIGNED void TCMallocInternalFree( + void* ptr) noexcept { + do_free(ptr); +} + +extern "C" void TCMallocInternalSdallocx(void* ptr, size_t size, + int flags) noexcept { + size_t alignment = alignof(std::max_align_t); + + if (ABSL_PREDICT_FALSE(flags != 0)) { + ASSERT((flags & ~0x3f) == 0); + alignment = static_cast(1ull << (flags & 0x3f)); + } + + return do_free_with_size(ptr, size, tcmalloc::AlignAsPolicy(alignment)); +} + +extern "C" void* TCMallocInternalCalloc(size_t n, size_t elem_size) noexcept { + // Overflow check + const size_t size = n * elem_size; + if (elem_size != 0 && size / elem_size != n) { + return MallocPolicy::handle_oom(std::numeric_limits::max()); + } + void* result = fast_alloc(MallocPolicy(), size); + if (result != nullptr) { + memset(result, 0, size); + } + return result; +} + +// Here and below we use TCMALLOC_ALIAS (if supported) to make +// identical functions aliases. This saves space in L1 instruction +// cache. As of now it saves ~9K. +extern "C" void TCMallocInternalCfree(void* ptr) noexcept +#ifdef TCMALLOC_ALIAS + TCMALLOC_ALIAS(TCMallocInternalFree); +#else +{ + do_free(ptr); +} +#endif // TCMALLOC_ALIAS + +static inline void* do_realloc(void* old_ptr, size_t new_size) { + Static::InitIfNecessary(); + // Get the size of the old entry + const size_t old_size = GetSize(old_ptr); + + // Reallocate if the new size is larger than the old size, + // or if the new size is significantly smaller than the old size. + // We do hysteresis to avoid resizing ping-pongs: + // . If we need to grow, grow to max(new_size, old_size * 1.X) + // . Don't shrink unless new_size < old_size * 0.Y + // X and Y trade-off time for wasted space. For now we do 1.25 and 0.5. + const size_t min_growth = std::min( + old_size / 4, + std::numeric_limits::max() - old_size); // Avoid overflow. + const size_t lower_bound_to_grow = old_size + min_growth; + const size_t upper_bound_to_shrink = old_size / 2; + if ((new_size > old_size) || (new_size < upper_bound_to_shrink)) { + // Need to reallocate. + void* new_ptr = nullptr; + + if (new_size > old_size && new_size < lower_bound_to_grow) { + // Avoid fast_alloc() reporting a hook with the lower bound size + // as we the expectation for pointer returning allocation functions + // is that malloc hooks are invoked with the requested_size. + new_ptr = fast_alloc(MallocPolicy().Nothrow().WithoutHooks(), + lower_bound_to_grow); + if (new_ptr != nullptr) { + } + } + if (new_ptr == nullptr) { + // Either new_size is not a tiny increment, or last do_malloc failed. + new_ptr = fast_alloc(MallocPolicy(), new_size); + } + if (new_ptr == nullptr) { + return nullptr; + } + memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size)); + // We could use a variant of do_free() that leverages the fact + // that we already know the sizeclass of old_ptr. The benefit + // would be small, so don't bother. + do_free(old_ptr); + return new_ptr; + } else { + return old_ptr; + } +} + +extern "C" void* TCMallocInternalRealloc(void* old_ptr, + size_t new_size) noexcept { + if (old_ptr == NULL) { + return fast_alloc(MallocPolicy(), new_size); + } + if (new_size == 0) { + do_free(old_ptr); + return NULL; + } + return do_realloc(old_ptr, new_size); +} + +extern "C" void* TCMallocInternalNewNothrow(size_t size, + const std::nothrow_t&) noexcept { + return fast_alloc(CppPolicy().Nothrow(), size); +} + +extern "C" tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_nothrow( + size_t size) noexcept { + size_t capacity; + void* p = fast_alloc(CppPolicy().Nothrow(), size, &capacity); + return {p, capacity}; +} + +extern "C" ABSL_CACHELINE_ALIGNED void TCMallocInternalDelete(void* p) noexcept +#ifdef TCMALLOC_ALIAS + TCMALLOC_ALIAS(TCMallocInternalFree); +#else +{ + do_free(p); +} +#endif // TCMALLOC_ALIAS + +extern "C" void TCMallocInternalDeleteAligned( + void* p, std::align_val_t alignment) noexcept +#if defined(TCMALLOC_ALIAS) && defined(NDEBUG) + TCMALLOC_ALIAS(TCMallocInternalDelete); +#else +{ + // Note: The aligned delete/delete[] implementations differ slightly from + // their respective aliased implementations to take advantage of checking the + // passed-in alignment. + ASSERT(CorrectAlignment(p, alignment)); + return TCMallocInternalDelete(p); +} +#endif + +extern "C" ABSL_CACHELINE_ALIGNED void TCMallocInternalDeleteSized( + void* p, size_t size) noexcept { + ASSERT(CorrectSize(p, size, tcmalloc::DefaultAlignPolicy())); + do_free_with_size(p, size, tcmalloc::DefaultAlignPolicy()); +} + +extern "C" void TCMallocInternalDeleteSizedAligned( + void* p, size_t t, std::align_val_t alignment) noexcept { + return do_free_with_size(p, t, tcmalloc::AlignAsPolicy(alignment)); +} + +extern "C" void TCMallocInternalDeleteArraySized(void* p, size_t size) noexcept +#ifdef TCMALLOC_ALIAS + TCMALLOC_ALIAS(TCMallocInternalDeleteSized); +#else +{ + do_free_with_size(p, size, tcmalloc::DefaultAlignPolicy()); +} +#endif + +extern "C" void TCMallocInternalDeleteArraySizedAligned( + void* p, size_t t, std::align_val_t alignment) noexcept +#ifdef TCMALLOC_ALIAS + TCMALLOC_ALIAS(TCMallocInternalDeleteSizedAligned); +#else +{ + return TCMallocInternalDeleteSizedAligned(p, t, alignment); +} +#endif + +// Standard C++ library implementations define and use this +// (via ::operator delete(ptr, nothrow)). +// But it's really the same as normal delete, so we just do the same thing. +extern "C" void TCMallocInternalDeleteNothrow(void* p, + const std::nothrow_t&) noexcept +#ifdef TCMALLOC_ALIAS + TCMALLOC_ALIAS(TCMallocInternalFree); +#else +{ + do_free(p); +} +#endif // TCMALLOC_ALIAS + +extern "C" void TCMallocInternalDeleteAligned_nothrow( + void* p, std::align_val_t alignment, const std::nothrow_t& nt) noexcept +#if defined(TCMALLOC_ALIAS) && defined(NDEBUG) + TCMALLOC_ALIAS(TCMallocInternalDelete); +#else +{ + ASSERT(CorrectAlignment(p, alignment)); + return TCMallocInternalDelete(p); +} +#endif + +extern "C" void* TCMallocInternalNewArray(size_t size) +#ifdef TCMALLOC_ALIAS + TCMALLOC_ALIAS(TCMallocInternalNew); +#else +{ + void* p = fast_alloc(CppPolicy().WithoutHooks(), size); + // We keep this next instruction out of fast_alloc for a reason: when + // it's in, and new just calls fast_alloc, the optimizer may fold the + // new call into fast_alloc, which messes up our whole section-based + // stacktracing (see ABSL_ATTRIBUTE_SECTION, above). This ensures fast_alloc + // isn't the last thing this fn calls, and prevents the folding. + MallocHook::InvokeNewHook(p, size); + return p; +} +#endif // TCMALLOC_ALIAS + +extern "C" void* TCMallocInternalNewArrayAligned(size_t size, + std::align_val_t alignment) +#if defined(TCMALLOC_ALIAS) && defined(NDEBUG) + TCMALLOC_ALIAS(TCMallocInternalNewAligned); +#else +{ + return TCMallocInternalNewAligned(size, alignment); +} +#endif + +extern "C" void* TCMallocInternalNewArrayNothrow(size_t size, + const std::nothrow_t&) noexcept +#ifdef TCMALLOC_ALIAS + TCMALLOC_ALIAS(TCMallocInternalNewNothrow); +#else +{ + return fast_alloc(CppPolicy().Nothrow(), size); +} +#endif // TCMALLOC_ALIAS + +extern "C" void* TCMallocInternalNewArrayAligned_nothrow( + size_t size, std::align_val_t alignment, const std::nothrow_t&) noexcept +// Note: we use malloc rather than new, as we are allowed to return nullptr. +// The latter crashes in that case. +#if defined(TCMALLOC_ALIAS) && defined(NDEBUG) + TCMALLOC_ALIAS(TCMallocInternalMalloc_aligned); +#else +{ + return TCMallocInternalMalloc_aligned(size, alignment); +} +#endif + +extern "C" void TCMallocInternalDeleteArray(void* p) noexcept +#ifdef TCMALLOC_ALIAS + TCMALLOC_ALIAS(TCMallocInternalFree); +#else +{ + do_free(p); +} +#endif // TCMALLOC_ALIAS + +extern "C" void TCMallocInternalDeleteArrayAligned( + void* p, std::align_val_t alignment) noexcept +#if defined(TCMALLOC_ALIAS) && defined(NDEBUG) + TCMALLOC_ALIAS(TCMallocInternalDelete); +#else +{ + ASSERT(CorrectAlignment(p, alignment)); + return TCMallocInternalDelete(p); +} +#endif + +extern "C" void TCMallocInternalDeleteArrayNothrow( + void* p, const std::nothrow_t&) noexcept +#ifdef TCMALLOC_ALIAS + TCMALLOC_ALIAS(TCMallocInternalFree); +#else +{ + do_free(p); +} +#endif // TCMALLOC_ALIAS + +extern "C" void TCMallocInternalDeleteArrayAligned_nothrow( + void* p, std::align_val_t alignment, const std::nothrow_t&) noexcept +#if defined(TCMALLOC_ALIAS) && defined(NDEBUG) + TCMALLOC_ALIAS(TCMallocInternalDelete); +#else +{ + ASSERT(CorrectAlignment(p, alignment)); + return TCMallocInternalDelete(p); +} +#endif + +extern "C" void* TCMallocInternalMemalign(size_t align, size_t size) noexcept { + ASSERT(align != 0); + ASSERT((align & (align - 1)) == 0); + return fast_alloc(MallocPolicy().AlignAs(align), size); +} + +extern "C" void* TCMallocInternalAlignedAlloc(size_t align, + size_t size) noexcept +#if defined(TCMALLOC_ALIAS) && defined(NDEBUG) + TCMALLOC_ALIAS(TCMallocInternalMemalign); +#else +{ + // aligned_alloc is memalign, but with the requirement that: + // align be a power of two (like memalign) + // size be a multiple of align (for the time being). + ASSERT(align != 0); + ASSERT(size % align == 0); + + return TCMallocInternalMemalign(align, size); +} +#endif + +extern "C" int TCMallocInternalPosixMemalign(void** result_ptr, size_t align, + size_t size) noexcept { + if (((align % sizeof(void*)) != 0) || + ((align & (align - 1)) != 0) || + (align == 0)) { + return EINVAL; + } + void* result = fast_alloc(MallocPolicy().Nothrow().AlignAs(align), size); + if (result == NULL) { + return ENOMEM; + } else { + *result_ptr = result; + return 0; + } +} + +static size_t pagesize = 0; + +extern "C" void* TCMallocInternalValloc(size_t size) noexcept { + // Allocate page-aligned object of length >= size bytes + if (pagesize == 0) pagesize = getpagesize(); + return fast_alloc(MallocPolicy().Nothrow().AlignAs(pagesize), size); +} + +extern "C" void* TCMallocInternalPvalloc(size_t size) noexcept { + // Round up size to a multiple of pagesize + if (pagesize == 0) pagesize = getpagesize(); + if (size == 0) { // pvalloc(0) should allocate one page, according to + size = pagesize; // http://man.free4web.biz/man3/libmpatrol.3.html + } + size = (size + pagesize - 1) & ~(pagesize - 1); + return fast_alloc(MallocPolicy().Nothrow().AlignAs(pagesize), size); +} + +extern "C" void TCMallocInternalMallocStats(void) noexcept { + do_malloc_stats(); +} + +extern "C" int TCMallocInternalMallOpt(int cmd, int value) noexcept { + return do_mallopt(cmd, value); +} + +#ifdef HAVE_STRUCT_MALLINFO +extern "C" struct mallinfo TCMallocInternalMallocInfo(void) noexcept { + return do_mallinfo(); +} +#endif + +extern "C" size_t TCMallocInternalMallocSize(void* ptr) noexcept { + ASSERT(tcmalloc::GetOwnership(ptr) != + tcmalloc::MallocExtension::Ownership::kNotOwned); + return GetSize(ptr); +} + +// The constructor allocates an object to ensure that initialization +// runs before main(), and therefore we do not have a chance to become +// multi-threaded before initialization. We also create the TSD key +// here. Presumably by the time this constructor runs, glibc is in +// good enough shape to handle pthread_key_create(). +// +// The destructor prints stats when the program exits. +class TCMallocGuard { + public: + TCMallocGuard() { + ReplaceSystemAlloc(); // defined in libc_override_*.h + TCMallocInternalFree(TCMallocInternalMalloc(1)); + ThreadCache::InitTSD(); + TCMallocInternalFree(TCMallocInternalMalloc(1)); + } +}; + +static TCMallocGuard module_enter_exit_hook; diff --git a/tcmalloc/tcmalloc.h b/tcmalloc/tcmalloc.h new file mode 100644 index 000000000..d74e421ce --- /dev/null +++ b/tcmalloc/tcmalloc.h @@ -0,0 +1,124 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This is the exported interface from tcmalloc. For most users, +// tcmalloc just overrides existing libc functionality, and thus this +// .h file isn't needed. But we also provide the tcmalloc allocation +// routines through their own, dedicated name -- so people can wrap +// their own malloc functions around tcmalloc routines, perhaps. +// These are exported here. + +#ifndef TCMALLOC_TCMALLOC_H_ +#define TCMALLOC_TCMALLOC_H_ + +#include +#include + +#include "absl/base/attributes.h" +#include "tcmalloc/internal/declarations.h" + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#include +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +#ifdef __cplusplus + +extern "C" { +#endif +void* TCMallocInternalMalloc(size_t size) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void TCMallocInternalFree(void* ptr) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void TCMallocInternalSdallocx(void* ptr, size_t size, int flags) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void* TCMallocInternalRealloc(void* ptr, size_t size) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void* TCMallocInternalCalloc(size_t n, size_t size) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void TCMallocInternalCfree(void* ptr) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); + +void* TCMallocInternalAlignedAlloc(size_t align, size_t __size) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void* TCMallocInternalMemalign(size_t align, size_t __size) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +int TCMallocInternalPosixMemalign(void** ptr, size_t align, size_t size) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void* TCMallocInternalValloc(size_t __size) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void* TCMallocInternalPvalloc(size_t __size) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); + +void TCMallocInternalMallocStats(void) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +int TCMallocInternalMallOpt(int cmd, int value) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +#if !defined(OS_FREEBSD) && !defined(OS_MACOSX) + // struct mallinfo isn't defined on these platforms +struct mallinfo TCMallocInternalMallocInfo(void) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +#endif + +// This is an alias for MallocExtension::GetAllocatedSize(). +// It is equivalent to +// OS X: malloc_size() +// glibc: malloc_usable_size() +// Windows: _msize() +size_t TCMallocInternalMallocSize(void* ptr) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); + +#ifdef __cplusplus +void* TCMallocInternalNew(size_t size) ABSL_ATTRIBUTE_SECTION(google_malloc); +void* TCMallocInternalNewAligned(size_t size, std::align_val_t alignment) + ABSL_ATTRIBUTE_SECTION(google_malloc); +void* TCMallocInternalNewNothrow(size_t size, const std::nothrow_t&) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void TCMallocInternalDelete(void* p) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void TCMallocInternalDeleteAligned(void* p, std::align_val_t alignment) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void TCMallocInternalDeleteSized(void* p, size_t size) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void TCMallocInternalDeleteSizedAligned(void* p, size_t t, + std::align_val_t alignment) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void TCMallocInternalDeleteNothrow(void* p, const std::nothrow_t&) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void* TCMallocInternalNewArray(size_t size) + ABSL_ATTRIBUTE_SECTION(google_malloc); +void* TCMallocInternalNewArrayAligned(size_t size, std::align_val_t alignment) + ABSL_ATTRIBUTE_SECTION(google_malloc); +void* TCMallocInternalNewArrayNothrow(size_t size, + const std::nothrow_t&) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void TCMallocInternalDeleteArray(void* p) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void TCMallocInternalDeleteArrayAligned(void* p, + std::align_val_t alignment) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void TCMallocInternalDeleteArraySized(void* p, size_t size) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void TCMallocInternalDeleteArraySizedAligned(void* p, size_t t, + std::align_val_t alignment) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +void TCMallocInternalDeleteArrayNothrow(void* p, const std::nothrow_t&) __THROW + ABSL_ATTRIBUTE_SECTION(google_malloc); +} +#endif + +#endif // TCMALLOC_TCMALLOC_H_ diff --git a/tcmalloc/tcmalloc_large_unittest.cc b/tcmalloc/tcmalloc_large_unittest.cc new file mode 100644 index 000000000..88855052c --- /dev/null +++ b/tcmalloc/tcmalloc_large_unittest.cc @@ -0,0 +1,201 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This is a unit test for large allocations in malloc and friends. +// "Large" means "so large that they overflow the address space". +// For 32 bits, this means allocations near 2^32 bytes and 2^31 bytes. +// For 64 bits, this means allocations near 2^64 bytes and 2^63 bytes. + +#include +#include +#include + +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/container/node_hash_set.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace { + +// Alloc a size that should always fail. +void TryAllocExpectFail(size_t size) { + void* p1 = malloc(size); + ASSERT_EQ(p1, nullptr); + + void* p2 = malloc(1); + ASSERT_NE(p2, nullptr); + + void* p3 = realloc(p2, size); + ASSERT_EQ(p3, nullptr); + + free(p2); +} + +// Alloc a size that might work and might fail. +// If it does work, touch some pages. + +void TryAllocMightFail(size_t size) { + unsigned char* p = static_cast(malloc(size)); + if (p != nullptr) { + unsigned char volatile* vp = p; // prevent optimizations + static const size_t kPoints = 1024; + + for ( size_t i = 0; i < kPoints; ++i ) { + vp[i * (size / kPoints)] = static_cast(i); + } + + for ( size_t i = 0; i < kPoints; ++i ) { + ASSERT_EQ(vp[i * (size / kPoints)], static_cast(i)); + } + + vp[size-1] = 'M'; + ASSERT_EQ(vp[size - 1], 'M'); + } else { + ASSERT_EQ(errno, ENOMEM); + } + + free(p); +} + +class NoErrnoRegion final : public AddressRegion { + public: + explicit NoErrnoRegion(AddressRegion* underlying) : underlying_(underlying) {} + + std::pair Alloc(size_t size, size_t alignment) override { + std::pair result = underlying_->Alloc(size, alignment); + errno = 0; + return result; + } + + private: + AddressRegion* underlying_; +}; + +class NoErrnoRegionFactory final : public AddressRegionFactory { + public: + explicit NoErrnoRegionFactory(AddressRegionFactory* underlying) + : underlying_(underlying) {} + ~NoErrnoRegionFactory() override {} + + AddressRegion* Create(void* start, size_t size, UsageHint hint) override { + AddressRegion* underlying_region = underlying_->Create(start, size, hint); + CHECK_CONDITION(underlying_region != nullptr); + void* region_space = MallocInternal(sizeof(NoErrnoRegion)); + CHECK_CONDITION(region_space != nullptr); + return new (region_space) NoErrnoRegion(underlying_region); + } + + // Get a human-readable description of the current state of the + // allocator. + size_t GetStats(absl::Span buffer) override { + return underlying_->GetStats(buffer); + } + + private: + AddressRegionFactory* const underlying_; +}; + +class LargeAllocationTest : public ::testing::Test { + public: + LargeAllocationTest() { + old_ = MallocExtension::GetRegionFactory(); + MallocExtension::SetRegionFactory(new NoErrnoRegionFactory(old_)); + + // Grab some memory so that some later allocations are guaranteed to fail. + small_ = ::operator new(4 << 20); + } + + ~LargeAllocationTest() { + ::operator delete(small_); + + auto* current = MallocExtension::GetRegionFactory(); + + MallocExtension::SetRegionFactory(old_); + delete current; + } + + private: + AddressRegionFactory* old_; + void* small_; +}; + +// Allocate some 0-byte objects. They better be unique. 0 bytes is not large +// but it exercises some paths related to large-allocation code. +TEST_F(LargeAllocationTest, UniqueAddresses) { + constexpr int kZeroTimes = 1024; + + absl::node_hash_set ptrs; + for (int i = 0; i < kZeroTimes; ++i) { + void* p = malloc(1); + ASSERT_NE(p, nullptr); + EXPECT_THAT(ptrs, ::testing::Not(::testing::Contains(p))); + ptrs.insert(p); + } + + for (auto* p : ptrs) { + free(p); + } +} + +TEST_F(LargeAllocationTest, MaxSize) { + // Test sizes up near the maximum size_t. These allocations test the + // wrap-around code. + constexpr size_t zero = 0; + constexpr size_t kMinusNTimes = 16384; + for (size_t i = 1; i < kMinusNTimes; ++i) { + TryAllocExpectFail(zero - i); + } +} + +TEST_F(LargeAllocationTest, NearMaxSize) { + // Test sizes a bit smaller. The small malloc above guarantees that all these + // return nullptr. + constexpr size_t zero = 0; + constexpr size_t kMinusMBMinusNTimes = 16384; + for (size_t i = 0; i < kMinusMBMinusNTimes; ++i) { + TryAllocExpectFail(zero - 1048576 - i); + } +} + +TEST_F(LargeAllocationTest, Half) { + // Test sizes at half of size_t. + // These might or might not fail to allocate. + constexpr size_t kHalfPlusMinusTimes = 64; + constexpr size_t half = std::numeric_limits::max() / 2 + 1; + for (size_t i = 0; i < kHalfPlusMinusTimes; ++i) { + TryAllocMightFail(half - i); + TryAllocMightFail(half + i); + } +} + +TEST_F(LargeAllocationTest, NearMaxAddressBits) { + // Tests sizes near the maximum address space size. + // For -1 <= i < 5, we expect all allocations to fail. For -6 <= i < -1, the + // allocation might succeed but create so much pagemap metadata that we exceed + // test memory limits and OOM. So we skip that range. + for (int i = -10; i < -6; ++i) { + TryAllocMightFail(size_t{1} << (kAddressBits + i)); + } + for (int i = -1; i < 5; ++i) { + TryAllocExpectFail(size_t{1} << (kAddressBits + i)); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/tcmalloc_policy.h b/tcmalloc/tcmalloc_policy.h new file mode 100644 index 000000000..b1d12ca83 --- /dev/null +++ b/tcmalloc/tcmalloc_policy.h @@ -0,0 +1,180 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file defines policies used when allocation memory. +// +// An allocation policy encapsulates three policies: +// +// - Out of memory policy. +// Dictates how to handle OOM conditions. +// +// struct OomPolicyTemplate { +// // Invoked when we failed to allocate memory +// // Must either terminate, throw, or return nullptr +// static void* handle_oom(size_t size); +// }; +// +// - Alignment policy +// Dictates alignment to use for an allocation. +// Must be trivially copyable. +// +// struct AlignPolicyTemplate { +// // Returns the alignment to use for the memory allocation, +// // or 1 to use small allocation table alignments (8 bytes) +// // Returned value Must be a non-zero power of 2. +// size_t align() const; +// }; +// +// - Hook invocation policy +// dictates invocation of allocation hooks +// +// struct HooksPolicyTemplate { +// // Returns true if allocation hooks must be invoked. +// static bool invoke_hooks(); +// }; + +#ifndef TCMALLOC_TCMALLOC_POLICY_H_ +#define TCMALLOC_TCMALLOC_POLICY_H_ + +#include +#include +#include + +#include + +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { + +// NullOomPolicy: returns nullptr +struct NullOomPolicy { + static inline constexpr void* handle_oom(size_t size) { return nullptr; } + + static constexpr bool can_return_nullptr() { return true; } +}; + +// MallocOomPolicy: sets errno to ENOMEM and returns nullptr +struct MallocOomPolicy { + static inline void* handle_oom(size_t size) { + errno = ENOMEM; + return nullptr; + } + + static constexpr bool can_return_nullptr() { return true; } +}; + +// CppOomPolicy: terminates the program +struct CppOomPolicy { + static ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NORETURN void* handle_oom( + size_t size) { + Log(kCrashWithStats, __FILE__, __LINE__, "Unable to allocate (new failed)", + size); + __builtin_unreachable(); + } + + static constexpr bool can_return_nullptr() { return false; } +}; + +// DefaultAlignPolicy: use default small size table based allocation +struct DefaultAlignPolicy { + // Important: the value here is explicitly '1' to indicate that the used + // alignment is the default alignment of the size tables in tcmalloc. + // The constexpr value of 1 will optimize out the alignment checks and + // iterations in the GetSizeClass() calls for default aligned allocations. + static constexpr size_t align() { return 1; } +}; + +// MallocAlignPolicy: use std::max_align_t allocation +struct MallocAlignPolicy { + static constexpr size_t align() { return alignof(std::max_align_t); } +}; + +// AlignAsPolicy: use user provided alignment +class AlignAsPolicy { + public: + AlignAsPolicy() = delete; + explicit constexpr AlignAsPolicy(size_t value) : value_(value) {} + explicit constexpr AlignAsPolicy(std::align_val_t value) + : AlignAsPolicy(static_cast(value)) {} + + size_t constexpr align() const { return value_; } + + private: + size_t value_; +}; + +// InvokeHooksPolicy: invoke memory allocation hooks +struct InvokeHooksPolicy { + static constexpr bool invoke_hooks() { return true; } +}; + +// NoHooksPolicy: do not invoke memory allocation hooks +struct NoHooksPolicy { + static constexpr bool invoke_hooks() { return false; } +}; + +// TCMallocPolicy defines the compound policy object containing +// the OOM, alignment and hooks policies. +// Is trivially constructible, copyable and destructible. +template +class TCMallocPolicy { + public: + constexpr TCMallocPolicy() = default; + explicit constexpr TCMallocPolicy(AlignPolicy align) : align_(align) {} + + // OOM policy + static void* handle_oom(size_t size) { return OomPolicy::handle_oom(size); } + + // Alignment policy + constexpr size_t align() const { return align_.align(); } + + // Hooks policy + static constexpr bool invoke_hooks() { return HooksPolicy::invoke_hooks(); } + + // Returns this policy aligned as 'align' + template + constexpr TCMallocPolicy AlignAs( + align_t align) const { + return TCMallocPolicy( + AlignAsPolicy{align}); + } + + // Returns this policy with a nullptr OOM policy. + constexpr TCMallocPolicy Nothrow() + const { + return TCMallocPolicy(align_); + } + + // Returns this policy with NewAllocHook invocations disabled. + constexpr TCMallocPolicy WithoutHooks() + const { + return TCMallocPolicy(align_); + } + + static constexpr bool can_return_nullptr() { + return OomPolicy::can_return_nullptr(); + } + + private: + AlignPolicy align_; +}; + +using CppPolicy = TCMallocPolicy; +using MallocPolicy = TCMallocPolicy; + +} // namespace tcmalloc + +#endif // TCMALLOC_TCMALLOC_POLICY_H_ diff --git a/tcmalloc/testing/BUILD b/tcmalloc/testing/BUILD new file mode 100644 index 000000000..2b65b9a67 --- /dev/null +++ b/tcmalloc/testing/BUILD @@ -0,0 +1,595 @@ +# Copyright 2019 The TCMalloc Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- mode: python; -*- + +# Description: +# Tests for tcmalloc, including a performance test. + +load("//tcmalloc:copts.bzl", "TCMALLOC_DEFAULT_COPTS") + +licenses(["notice"]) # Apache 2.0 + +# Several tests below test internals of malloc. +# As such, they must disable the compiler's builtin knowledge about malloc/free, +# e.g. that malloc has no side effects, etc. +NO_BUILTIN_MALLOC = [ + "-fno-builtin-malloc", + "-fno-builtin-free", + "-fno-builtin-strdup", +] + +REGTEST_DEPS = [ + ":testutil", + "//tcmalloc:headers_for_tests", + "@com_google_googletest//:gtest", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:parameter_accessors", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:distributions", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/synchronization", + "//tcmalloc/internal:declarations", + "//tcmalloc:malloc_extension", +] + +REGTEST_OPTS = [ + "$(STACK_FRAME_UNLIMITED)", # required for tcmalloc_regtest.cc +] + NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS + +cc_library( + name = "testutil", + testonly = 1, + srcs = ["testutil.cc"], + hdrs = ["testutil.h"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + "//tcmalloc:malloc_extension", + "@com_google_absl//absl/base:core_headers", + ], +) + +cc_test( + name = "tcmalloc_regtest", + srcs = ["tcmalloc_regtest.cc"], + copts = REGTEST_OPTS, + linkstatic = 1, # get the most realistic performance + malloc = "//tcmalloc", + visibility = [ + "//visibility:private", + ], + deps = REGTEST_DEPS + ["@com_github_google_benchmark//:benchmark"], +) + +cc_test( + name = "tcmalloc_regtest_small_but_slow", + srcs = ["tcmalloc_regtest.cc"], + copts = [ + "-DTCMALLOC_SMALL_BUT_SLOW", + ] + REGTEST_OPTS, + linkstatic = 1, # get the most realistic performance + malloc = "//tcmalloc:tcmalloc_small_but_slow", + deps = REGTEST_DEPS + ["@com_github_google_benchmark//:benchmark"], +) + +cc_test( + name = "tcmalloc_regtest_large_page", + srcs = ["tcmalloc_regtest.cc"], + copts = [ + "-DTCMALLOC_LARGE_PAGES", + ] + REGTEST_OPTS, + linkstatic = 1, # get the most realistic performance + malloc = "//tcmalloc:tcmalloc_large_pages", + deps = REGTEST_DEPS + ["@com_github_google_benchmark//:benchmark"], +) + +cc_test( + name = "tcmalloc_regtest_256k_pages", + srcs = ["tcmalloc_regtest.cc"], + copts = [ + "-DTCMALLOC_256K_PAGES", + ] + REGTEST_OPTS, + linkstatic = 1, # get the most realistic performance + malloc = "//tcmalloc:tcmalloc_256k_pages", + deps = REGTEST_DEPS + ["@com_github_google_benchmark//:benchmark"], +) + +cc_test( + name = "tcmalloc_regtest_no_hp_aware", + srcs = ["tcmalloc_regtest.cc"], + copts = REGTEST_OPTS, + linkstatic = 1, # get the most realistic performance + malloc = "//tcmalloc", + deps = REGTEST_DEPS + [ + "@com_github_google_benchmark//:benchmark", + "//tcmalloc:want_no_hpaa", + ], +) + +cc_test( + name = "tcmalloc_regtest_hp_aware", + srcs = ["tcmalloc_regtest.cc"], + copts = REGTEST_OPTS, + linkstatic = 1, # get the most realistic performance + malloc = "//tcmalloc", + deps = REGTEST_DEPS + [ + "@com_github_google_benchmark//:benchmark", + "//tcmalloc:want_hpaa", + ], +) + +cc_test( + name = "tcmalloc_regtest_deprecated_perthread", + srcs = ["tcmalloc_regtest.cc"], + copts = REGTEST_OPTS + ["-DTCMALLOC_DEPRECATED_PERTHREAD"], + linkstatic = 1, # get the most realistic performance + malloc = "//tcmalloc:tcmalloc_deprecated_perthread", + deps = REGTEST_DEPS + ["@com_github_google_benchmark//:benchmark"], +) + +cc_test( + name = "markidle_unittest", + srcs = ["markidle_unittest.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc:tcmalloc_deprecated_perthread", + deps = [ + ":testutil", + "//tcmalloc:malloc_extension", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/flags:flag", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "memalign_unittest", + srcs = ["memalign_unittest.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":testutil", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:distributions", + "@com_google_googletest//:gtest_main", + ], +) + +# Fragmentation timing and unittest +cc_test( + name = "frag_unittest", + srcs = ["frag_unittest.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + "//tcmalloc", + "//tcmalloc:headers_for_tests", + "//tcmalloc:malloc_extension", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +LARGESMALL_FRAG_DEPS = [ + "@com_google_googletest//:gtest_main", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings:str_format", + "//tcmalloc/internal:linked_list", + "//tcmalloc/internal:memory_stats", + "//tcmalloc:malloc_extension", +] + +cc_test( + name = "largesmall_frag_unittest", + srcs = ["largesmall_frag_unittest.cc"], + copts = [ + "$(STACK_FRAME_UNLIMITED)", # largesmall_frag_unittest.cc + ] + NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = LARGESMALL_FRAG_DEPS + ["@com_github_google_benchmark//:benchmark"], +) + +cc_test( + name = "largesmall_frag_unittest_256k_pages", + srcs = ["largesmall_frag_unittest.cc"], + copts = [ + "$(STACK_FRAME_UNLIMITED)", # largesmall_frag_unittest.cc + "-DTCMALLOC_256K_PAGES", + ] + NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc:tcmalloc_256k_pages", + deps = LARGESMALL_FRAG_DEPS + ["@com_github_google_benchmark//:benchmark"], +) + +cc_test( + name = "largesmall_frag_unittest_large_page", + srcs = ["largesmall_frag_unittest.cc"], + copts = [ + "$(STACK_FRAME_UNLIMITED)", # largesmall_frag_unittest.cc + "-DTCMALLOC_LARGE_PAGES", + ] + NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc:tcmalloc_large_pages", + deps = LARGESMALL_FRAG_DEPS + ["@com_github_google_benchmark//:benchmark"], +) + +cc_test( + name = "outofmemory_test", + srcs = ["outofmemory_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":testutil", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "current_allocated_bytes_test", + size = "small", + srcs = ["current_allocated_bytes_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + "//tcmalloc:malloc_extension", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + ], +) + +cc_test( + name = "sampling_test", + srcs = ["sampling_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + deps = [ + ":testutil", + "//tcmalloc:malloc_extension", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/debugging:symbolize", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "sampling_memusage_test", + srcs = ["sampling_memusage_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + "//tcmalloc:headers_for_tests", + "//tcmalloc:malloc_extension", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:util", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "sampling_memusage_test_small_but_slow", + srcs = ["sampling_memusage_test.cc"], + copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc:tcmalloc_small_but_slow", + deps = [ + "//tcmalloc:headers_for_tests", + "//tcmalloc:malloc_extension", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:util", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "sampling_memusage_test_large_pages", + srcs = ["sampling_memusage_test.cc"], + copts = ["-DTCMALLOC_LARGE_PAGES"] + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc:tcmalloc_large_pages", + deps = [ + "//tcmalloc:headers_for_tests", + "//tcmalloc:malloc_extension", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:util", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "sampling_memusage_test_256k_pages", + srcs = ["sampling_memusage_test.cc"], + copts = ["-DTCMALLOC_256K_PAGES"] + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc:tcmalloc_256k_pages", + deps = [ + "//tcmalloc:headers_for_tests", + "//tcmalloc:malloc_extension", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:util", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + "@com_google_googletest//:gtest", + ], +) + +# Test of sampler class +cc_test( + name = "sampler_test", + srcs = ["sampler_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + deps = [ + ":testutil", + "//tcmalloc", + "//tcmalloc:headers_for_tests", + "//tcmalloc:malloc_extension", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) + +# Check amount of memory allocated at startup. +cc_test( + name = "startup_size_test", + srcs = ["startup_size_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + deps = [ + "//tcmalloc:malloc_extension", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "large_alloc_size_test", + srcs = ["large_alloc_size_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + deps = [ + "//tcmalloc:headers_for_tests", + "//tcmalloc:malloc_extension", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +# Test for b/3485510. Threads which heavily use malloc before TCMalloc's +# static initializers are invoked caused a race and crashes before cl/19792144 +# fixed that race. +cc_library( + name = "thread_ctor_unittest_lib", + testonly = 1, + srcs = ["thread_ctor_unittest_lib.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkopts = ["-pthread"], +) + +cc_test( + name = "thread_ctor_unittest", + srcs = ["thread_ctor_unittest_main.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":thread_ctor_unittest_lib", + "@com_github_google_benchmark//:benchmark", + ], +) + +cc_test( + name = "sample_size_class_test", + srcs = ["sample_size_class_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + deps = [ + "//tcmalloc", + "//tcmalloc:malloc_extension", + "//tcmalloc/internal:linked_list", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "no_deps_test", + srcs = ["no_deps_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + deps = ["@com_github_google_benchmark//:benchmark"], +) + +cc_test( + name = "limit_test", + srcs = ["limit_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + deps = [ + ":testutil", + "//tcmalloc:headers_for_tests", + "//tcmalloc:malloc_extension", + "//tcmalloc:want_no_hpaa", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:parameter_accessors", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "limit_test_hpaa", + srcs = ["limit_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + deps = [ + ":testutil", + "//tcmalloc:headers_for_tests", + "//tcmalloc:malloc_extension", + "//tcmalloc:want_hpaa", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:parameter_accessors", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "aligned_new_test", + srcs = ["aligned_new_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS + ["-faligned-new"], + linkstatic = 1, + malloc = "//tcmalloc", + deps = [ + "//tcmalloc:malloc_extension", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "memory_errors_test", + srcs = ["memory_errors_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + "//tcmalloc:common", + "//tcmalloc:malloc_extension", + "//tcmalloc/internal:bits", + "//tcmalloc/internal:declarations", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/memory", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "get_stats_test", + srcs = ["get_stats_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + deps = [ + ":testutil", + "//tcmalloc:common", + "//tcmalloc:malloc_extension", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "releasing_test", + srcs = ["releasing_test.cc"], + copts = NO_BUILTIN_MALLOC + TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + "//tcmalloc:common", + "//tcmalloc:malloc_extension", + "//tcmalloc:want_hpaa", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:memory_stats", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/random", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + "@com_google_googletest//:gtest", + ], +) + +DEFAULT_PARAMETERS_TEST_DEPS = [ + "@com_google_absl//absl/strings:str_format", + "//tcmalloc:malloc_extension", + "//tcmalloc/internal:logging", +] + +cc_test( + name = "default_parameters_test", + srcs = ["default_parameters_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = DEFAULT_PARAMETERS_TEST_DEPS + ["@com_github_google_benchmark//:benchmark"], +) + +cc_test( + name = "default_parameters_test_small_but_slow", + srcs = ["default_parameters_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS + [ + "-DTCMALLOC_SMALL_BUT_SLOW", + ], + malloc = "//tcmalloc:tcmalloc_small_but_slow", + deps = DEFAULT_PARAMETERS_TEST_DEPS + ["@com_github_google_benchmark//:benchmark"], +) + +cc_test( + name = "default_parameters_test_large_pages", + srcs = ["default_parameters_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS + [ + "-DTCMALLOC_LARGE_PAGES", + ], + malloc = "//tcmalloc:tcmalloc_large_pages", + deps = DEFAULT_PARAMETERS_TEST_DEPS + ["@com_github_google_benchmark//:benchmark"], +) + +cc_test( + name = "default_parameters_test_256k_pages", + srcs = ["default_parameters_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS + [ + "-DTCMALLOC_256K_PAGES", + ], + malloc = "//tcmalloc:tcmalloc_256k_pages", + deps = DEFAULT_PARAMETERS_TEST_DEPS + ["@com_github_google_benchmark//:benchmark"], +) diff --git a/tcmalloc/testing/aligned_new_test.cc b/tcmalloc/testing/aligned_new_test.cc new file mode 100644 index 000000000..c4962ad5e --- /dev/null +++ b/tcmalloc/testing/aligned_new_test.cc @@ -0,0 +1,178 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/attributes.h" +#include "absl/container/flat_hash_map.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace { + +struct Aligned4 { + int32_t a; +}; + +static_assert(alignof(Aligned4) == 4, "Unexpected alignment"); + +struct Aligned8 { + double a; + int32_t b; +}; + +static_assert(alignof(Aligned8) == 8, "Unexpected alignment"); + +struct Aligned16 { + long double a; + void* b; +}; + +static_assert(alignof(Aligned16) == 16, "Unexpected alignment"); + +struct alignas(32) Aligned32 { + int32_t a[4]; +}; + +static_assert(alignof(Aligned32) == 32, "Unexpected alignment"); + +struct alignas(64) Aligned64 { + int32_t a[8]; +} ABSL_ATTRIBUTE_PACKED; + +static_assert(alignof(Aligned64) == 64, "Unexpected alignment"); + +template +class AlignedNew : public ::testing::Test { + protected: + std::vector> ptrs; +}; + +TYPED_TEST_SUITE_P(AlignedNew); + +TYPED_TEST_P(AlignedNew, AlignedTest) { + const int kAllocations = 1 << 22; + this->ptrs.reserve(kAllocations); + + auto token = MallocExtension::StartAllocationProfiling(); + + for (int i = 0; i < kAllocations; i++) { + TypeParam* p = new TypeParam(); + benchmark::DoNotOptimize(p); + ASSERT_EQ(0, reinterpret_cast(p) & (alignof(TypeParam) - 1)); + + this->ptrs.emplace_back(p); + } + + auto profile = std::move(token).Stop(); + + // Verify the alignment was explicitly requested if alignof(TypeParam) > + // __STDCPP_DEFAULT_NEW_ALIGNMENT__. + // + // (size, alignment) -> count + using CountMap = absl::flat_hash_map, size_t>; + CountMap counts; + + profile.Iterate([&](const Profile::Sample& e) { + counts[{e.requested_size, e.requested_alignment}] += e.count; + }); + + size_t expected_alignment = 0; +#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) + if (alignof(TypeParam) > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { + expected_alignment = alignof(TypeParam); + } +#endif + EXPECT_GT((counts[{sizeof(TypeParam), expected_alignment}]), 0); + + ASSERT_EQ(kAllocations, this->ptrs.size()); +} + +TYPED_TEST_P(AlignedNew, SizeCheckSampling) { +#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) + if (alignof(TypeParam) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) { + // Allocations will not hit the sized+aligned delete path. + return; + } +#endif + + // Allocate enough objects to ensure we sample one. + const int allocations = + 32 * MallocExtension::GetProfileSamplingRate() / sizeof(TypeParam); + + for (int i = 0; i < allocations; i++) { + this->ptrs.emplace_back(new TypeParam()); + } + + ASSERT_EQ(allocations, this->ptrs.size()); + + // Trigger destruction. + this->ptrs.clear(); +} + +TYPED_TEST_P(AlignedNew, ArraySizeCheckSampling) { +#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) + if (alignof(TypeParam) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) { + // Allocations will not hit the sized+aligned delete path. + return; + } +#endif + + // NonTrival is not trivially destructible, so the compiler needs to keep + // track of the true size of arrays so that the proper number of destructors + // can be invoked. + struct NonTrivial { + virtual ~NonTrivial() {} + + TypeParam p; + }; + + static_assert(!std::is_trivially_destructible::value, + "NonTrivial should have a nontrivial destructor."); + + // Allocate enough objects to ensure we sample one. + const int allocations = + 32 * MallocExtension::GetProfileSamplingRate() / sizeof(TypeParam); + + std::vector> objects; + for (int i = 0; i < allocations; i++) { + objects.emplace_back(new NonTrivial[10]); + } + + ASSERT_EQ(allocations, objects.size()); + + // Trigger destruction. + objects.clear(); +} + +REGISTER_TYPED_TEST_SUITE_P(AlignedNew, AlignedTest, SizeCheckSampling, + ArraySizeCheckSampling); + +typedef ::testing::Types + MyTypes; +INSTANTIATE_TYPED_TEST_SUITE_P(My, AlignedNew, MyTypes); + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/current_allocated_bytes_test.cc b/tcmalloc/testing/current_allocated_bytes_test.cc new file mode 100644 index 000000000..9e805f119 --- /dev/null +++ b/tcmalloc/testing/current_allocated_bytes_test.cc @@ -0,0 +1,71 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This tests the accounting done by tcmalloc. When we allocate and +// free a small buffer, the number of bytes used by the application +// before the alloc+free should match the number of bytes used after. +// However, the internal data structures used by tcmalloc will be +// quite different -- new spans will have been allocated, etc. This +// is, thus, a simple test that we account properly for the internal +// data structures, so that we report the actual application-used +// bytes properly. + +#include +#include +#include + +#include + +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" + +using std::max; + +const char kCurrent[] = "generic.current_allocated_bytes"; + +int main() { + size_t before_bytes = + *tcmalloc::MallocExtension::GetNumericProperty(kCurrent); + + free(malloc(200)); + + size_t after_bytes = *tcmalloc::MallocExtension::GetNumericProperty(kCurrent); + if (before_bytes != after_bytes) { + tcmalloc::Log(tcmalloc::kCrash, __FILE__, __LINE__, "before != after", + before_bytes, after_bytes); + } + + // Do a lot of different allocs in a lot of different size classes, + // then free them all, to make sure that the logic is correct. + void* ptrs[1000]; // how many pointers to allocate in one run + for (int size = 1; size < 1000000; size = max(size + 1, size * 2 - 100)) { + for (int cycles = 0; cycles < 2; ++cycles) { + for (int repeat = 0; repeat < sizeof(ptrs)/sizeof(*ptrs); ++repeat) { + ptrs[repeat] = malloc(size); + } + for (int repeat = 0; repeat < sizeof(ptrs)/sizeof(*ptrs); ++repeat) { + free(ptrs[repeat]); + } + } + } + + after_bytes = *tcmalloc::MallocExtension::GetNumericProperty(kCurrent); + if (before_bytes != after_bytes) { + tcmalloc::Log(tcmalloc::kCrash, __FILE__, __LINE__, "before != after", + before_bytes, after_bytes); + } + + printf("PASS\n"); + return 0; +} diff --git a/tcmalloc/testing/default_parameters_test.cc b/tcmalloc/testing/default_parameters_test.cc new file mode 100644 index 000000000..f0735689c --- /dev/null +++ b/tcmalloc/testing/default_parameters_test.cc @@ -0,0 +1,72 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "absl/strings/str_format.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace { + +constexpr int64_t kDefaultProfileSamplingRate = +#ifdef TCMALLOC_SMALL_BUT_SLOW + 512 << 10 +#else + 2 << 20 +#endif + ; +constexpr int64_t kDefaultGuardedSamplingRate = 50 * kDefaultProfileSamplingRate; + +bool TestProfileSamplingRate() { + + auto extension_value = MallocExtension::GetProfileSamplingRate(); + if (extension_value != kDefaultProfileSamplingRate) { + absl::FPrintF(stderr, "ProfileSamplingRate: got %d, want %d\n", + extension_value, kDefaultProfileSamplingRate); + return false; + } + + return true; +} + +bool TestGuardedSamplingRate() { + + auto extension_value = MallocExtension::GetGuardedSamplingRate(); + if (extension_value != kDefaultGuardedSamplingRate) { + absl::FPrintF(stderr, "GuardedSamplingRate: got %d, want %d\n", + extension_value, kDefaultGuardedSamplingRate); + return false; + } + + return true; +} + +} // namespace +} // namespace tcmalloc + +int main() { + // This test has minimal dependencies, to avoid perturbing the initial + // parameters for TCMalloc. + bool success = true; + success = success & tcmalloc::TestProfileSamplingRate(); + success = success & tcmalloc::TestGuardedSamplingRate(); + + if (success) { + fprintf(stderr, "PASS"); + return 0; + } else { + return 1; + } +} diff --git a/tcmalloc/testing/frag_unittest.cc b/tcmalloc/testing/frag_unittest.cc new file mode 100644 index 000000000..307ffc44c --- /dev/null +++ b/tcmalloc/testing/frag_unittest.cc @@ -0,0 +1,71 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Test speed of handling fragmented heap + +#include +#include +#include + +#include +#include + +#include "gtest/gtest.h" +#include "tcmalloc/common.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace { + +TEST(Fragmentation, Slack) { + // Make kAllocSize one page larger than the maximum small object size. + static const int kAllocSize = kMaxSize + kPageSize; + // Allocate 400MB in total. + static const int kTotalAlloc = 400 << 20; + static const int kAllocIterations = kTotalAlloc / kAllocSize; + + // Allocate lots of objects + std::vector saved(kAllocIterations); + for (int i = 0; i < kAllocIterations; i++) { + saved[i] = ::operator new(kAllocSize); + } + + // Check the current "slack". + size_t slack_before = + *MallocExtension::GetNumericProperty("tcmalloc.slack_bytes"); + + // Free alternating ones to fragment heap + size_t free_bytes = 0; + for (int i = 0; i < saved.size(); i += 2) { + ::operator delete(saved[i]); + free_bytes += kAllocSize; + } + + // Check that slack delta is within 10% of expected. + size_t slack_after = + *MallocExtension::GetNumericProperty("tcmalloc.slack_bytes"); + ASSERT_GE(slack_after, slack_before); + size_t slack = slack_after - slack_before; + + EXPECT_GT(double(slack), 0.9 * free_bytes); + EXPECT_LT(double(slack), 1.1 * free_bytes); + + // Free remaining allocations. + for (int i = 1; i < saved.size(); i += 2) { + ::operator delete(saved[i]); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/get_stats_test.cc b/tcmalloc/testing/get_stats_test.cc new file mode 100644 index 000000000..a6c1162ca --- /dev/null +++ b/tcmalloc/testing/get_stats_test.cc @@ -0,0 +1,151 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "tcmalloc/common.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/parameters.h" +#include "tcmalloc/testing/testutil.h" + +namespace tcmalloc { +namespace { + +using ::testing::AnyOf; +using ::testing::ContainsRegex; +using ::testing::HasSubstr; + +class GetStatsTest : public ::testing::Test {}; + +TEST_F(GetStatsTest, Pbtxt) { + const std::string buf = GetStatsInPbTxt(); + + // Expect `buf` to be in pbtxt format. + EXPECT_THAT(buf, ContainsRegex(R"(in_use_by_app: [0-9]+)")); + EXPECT_THAT(buf, ContainsRegex(R"(page_heap_freelist: [0-9]+)")); + EXPECT_THAT(buf, ContainsRegex(R"(tcmalloc_huge_page_size: [0-9]+)")); +#if defined(GTEST_USES_PCRE) + EXPECT_THAT( + buf, ContainsRegex(R"(freelist\s{\s*sizeclass:\s\d+\s*bytes:\s\d+\s*})")); +#endif // defined(GTEST_USES_PCRE) + EXPECT_THAT(buf, AnyOf(HasSubstr(R"(page_heap {)"), + HasSubstr(R"(huge_page_aware {)"))); + EXPECT_THAT(buf, HasSubstr(R"(gwp_asan {)")); + + EXPECT_THAT(buf, ContainsRegex(R"(mmap_sys_allocator: [0-9]*)")); + EXPECT_THAT(buf, HasSubstr("memory_release_failures: 0")); + + if (MallocExtension::PerCpuCachesActive()) { + EXPECT_THAT(buf, ContainsRegex(R"(per_cpu_cache_freelist: [1-9][0-9]*)")); + EXPECT_THAT(buf, ContainsRegex(R"(percpu_slab_size: [1-9][0-9]*)")); + EXPECT_THAT(buf, ContainsRegex(R"(percpu_slab_residence: [1-9][0-9]*)")); + } else { + EXPECT_THAT(buf, HasSubstr("per_cpu_cache_freelist: 0")); + EXPECT_THAT(buf, HasSubstr("percpu_slab_size: 0")); + EXPECT_THAT(buf, HasSubstr("percpu_slab_residence: 0")); + } + + EXPECT_THAT(buf, HasSubstr("desired_usage_limit_bytes: -1")); + EXPECT_THAT(buf, HasSubstr("limit_hits: 0")); +} + +TEST_F(GetStatsTest, Parameters) { +#ifdef __x86_64__ + // HPAA is not enabled by default for non-x86 platforms, so we do not print + // parameters related to it (like subrelease) in these situations. + Parameters::set_hpaa_subrelease(false); +#endif + Parameters::set_guarded_sampling_rate(-1); + Parameters::set_per_cpu_caches(false); + Parameters::set_max_per_cpu_cache_size(-1); + Parameters::set_max_total_thread_cache_bytes(-1); + + { + const std::string buf = MallocExtension::GetStats(); + const std::string pbtxt = GetStatsInPbTxt(); + +#ifdef __x86_64__ + EXPECT_THAT(buf, HasSubstr(R"(PARAMETER hpaa_subrelease 0)")); +#endif + EXPECT_THAT(buf, + HasSubstr(R"(PARAMETER tcmalloc_guarded_sample_parameter -1)")); + EXPECT_THAT(buf, HasSubstr(R"(PARAMETER tcmalloc_per_cpu_caches 0)")); + EXPECT_THAT(buf, + HasSubstr(R"(PARAMETER tcmalloc_max_per_cpu_cache_size -1)")); + EXPECT_THAT( + buf, + HasSubstr(R"(PARAMETER tcmalloc_max_total_thread_cache_bytes -1)")); + +#ifdef __x86_64__ + EXPECT_THAT(pbtxt, HasSubstr(R"(using_hpaa_subrelease: false)")); +#endif + EXPECT_THAT(pbtxt, HasSubstr(R"(guarded_sample_parameter: -1)")); + EXPECT_THAT(pbtxt, HasSubstr(R"(tcmalloc_per_cpu_caches: false)")); + EXPECT_THAT(pbtxt, HasSubstr(R"(tcmalloc_max_per_cpu_cache_size: -1)")); + EXPECT_THAT(pbtxt, + HasSubstr(R"(tcmalloc_max_total_thread_cache_bytes: -1)")); + } + +#ifdef __x86_64__ + Parameters::set_hpaa_subrelease(true); +#endif + Parameters::set_guarded_sampling_rate(50 * + Parameters::profile_sampling_rate()); + Parameters::set_per_cpu_caches(true); + Parameters::set_max_per_cpu_cache_size(3 << 20); + Parameters::set_max_total_thread_cache_bytes(4 << 20); + + { + const std::string buf = MallocExtension::GetStats(); + const std::string pbtxt = GetStatsInPbTxt(); + +#ifdef __x86_64__ + EXPECT_THAT(buf, HasSubstr(R"(PARAMETER hpaa_subrelease 1)")); +#endif + EXPECT_THAT(buf, + HasSubstr(R"(PARAMETER tcmalloc_guarded_sample_parameter 50)")); + EXPECT_THAT( + buf, + HasSubstr( + R"(PARAMETER desired_usage_limit_bytes 18446744073709551615)")); + EXPECT_THAT(buf, HasSubstr(R"(PARAMETER tcmalloc_per_cpu_caches 1)")); + EXPECT_THAT( + buf, HasSubstr(R"(PARAMETER tcmalloc_max_per_cpu_cache_size 3145728)")); + EXPECT_THAT( + buf, HasSubstr( + R"(PARAMETER tcmalloc_max_total_thread_cache_bytes 4194304)")); + +#ifdef __x86_64__ + EXPECT_THAT(pbtxt, HasSubstr(R"(using_hpaa_subrelease: true)")); +#endif + EXPECT_THAT(pbtxt, HasSubstr(R"(guarded_sample_parameter: 50)")); + EXPECT_THAT(pbtxt, HasSubstr(R"(desired_usage_limit_bytes: -1)")); + EXPECT_THAT(pbtxt, HasSubstr(R"(hard_limit: false)")); + EXPECT_THAT(pbtxt, HasSubstr(R"(tcmalloc_per_cpu_caches: true)")); + EXPECT_THAT(pbtxt, + HasSubstr(R"(tcmalloc_max_per_cpu_cache_size: 3145728)")); + EXPECT_THAT(pbtxt, + HasSubstr(R"(tcmalloc_max_total_thread_cache_bytes: 4194304)")); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/large_alloc_size_test.cc b/tcmalloc/testing/large_alloc_size_test.cc new file mode 100644 index 000000000..6f80b5908 --- /dev/null +++ b/tcmalloc/testing/large_alloc_size_test.cc @@ -0,0 +1,54 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include +#include + +#include "gtest/gtest.h" +#include "tcmalloc/common.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace { + +// Regression test for b/31102171. +// Ensure that when we allocate lots of kMinSystemAlloc + epsilon blocks, +// tcmalloc does not double memory consumption. +TEST(LargeAllocSizeTest, Basic) { + typedef std::map PropertyMap; + PropertyMap map = MallocExtension::GetProperties(); + const size_t start_mem = map["generic.physical_memory_used"].value; + const size_t kTotalToAllocate = 1024 << 20; + const size_t kAllocSize = tcmalloc::kMinSystemAlloc + kPageSize; + const size_t kBlocks = kTotalToAllocate / kAllocSize; + void* volatile blocks[kBlocks]; + for (size_t i = 0; i < kBlocks; ++i) { + blocks[i] = malloc(kAllocSize); + } + map = MallocExtension::GetProperties(); + const size_t end_mem = map["generic.physical_memory_used"].value; + for (size_t i = 0; i < kBlocks; ++i) { + free(blocks[i]); + } + EXPECT_LE(end_mem - start_mem, kTotalToAllocate * 1.3) + << "start: " << (start_mem >> 20) << "MB -> " + << "end: " << (end_mem >> 20) << "MB " + << "(+" << ((end_mem - start_mem) >> 20) << "MB)"; +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/largesmall_frag_unittest.cc b/tcmalloc/testing/largesmall_frag_unittest.cc new file mode 100644 index 000000000..8c00ebb36 --- /dev/null +++ b/tcmalloc/testing/largesmall_frag_unittest.cc @@ -0,0 +1,110 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" +#include "tcmalloc/internal/linked_list.h" +#include "tcmalloc/internal/memory_stats.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace { + +int64_t VirtualProcessSize() { + tcmalloc::tcmalloc_internal::MemoryStats stats; + + if (tcmalloc::tcmalloc_internal::GetMemoryStats(&stats)) { + return stats.vss; + } else { + return 0; + } +} + +// This exercises memory fragmentation in the presence of calls to +// ReleaseMemoryToSystem(). +// +// We need to consider returned memory for reuse (PageHeap::large_.returned) if +// it is a better fit than a a non-returned range (PageHeap::large_.normal). +// Greedily preferring normal could cause us to fragment a larger memory block +// in the normal list when we have a better returned candidate. +// +// If we have smaller, long-lived allocations interspersed with larger, +// short-lived allocations, we might encounter fragmentation. +TEST(LargeSmallFrag, Test) { + static const int kLarge = 32 << 20; + static const int kSmall = 1000; + static const int kNumSmall = kLarge / kSmall + 10; + + // Force a small sample to initialize tagged page allocator. + constexpr int64_t kAlloc = 8192; + const int64_t num_allocs = + 32 * MallocExtension::GetProfileSamplingRate() / kAlloc; + for (int64_t i = 0; i < num_allocs; ++i) { + ::operator delete(::operator new(kAlloc)); + } + + // Chew up all possible memory that could be used to allocate + // small objects. + const int64_t vsize = VirtualProcessSize() / 1024 / 1024; + LinkedList small; + small.Init(); + while (VirtualProcessSize() / 1024 / 1024 == vsize) { + small.Push(::operator new(kSmall)); + } + + // How much memory usage should be allowed (include some slop). + // + // Note that because of vagaries of internal allocation policies, + // the retained small object may be placed in the space "reserved" + // for the second large object. That will cause the next iteration + // to allocate a third large-object space. Therefore we allow the + // virtual memory to grow to 3 * kLarge. + int64_t allowed = VirtualProcessSize() + 3*kLarge + (10 << 20); + + // Fragmentation loop + for (int iter = 0; iter < 1000; iter++) { + ::operator delete(::operator new(kLarge)); + + // Allocate some small objects and keep the middle one + void* objects[kNumSmall]; + for (int i = 0; i < kNumSmall; i++) { + objects[i] = ::operator new(kSmall); + } + for (int i = 0; i < kNumSmall; i++) { + if (i == 50) { + small.Push(objects[i]); + } else { + ::operator delete(objects[i]); + } + } + allowed += 2*kSmall; + + MallocExtension::ReleaseMemoryToSystem( + std::numeric_limits::max()); // Simulate scavenging + absl::FPrintF(stderr, "Iteration %5d ; Allowed: %d ; VSS %8.0f MB\n", iter, + allowed, VirtualProcessSize() / 1048576.0); + EXPECT_LE(VirtualProcessSize(), allowed); + } + + void* ptr; + while (small.TryPop(&ptr)) { + ::operator delete(ptr); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/limit_test.cc b/tcmalloc/testing/limit_test.cc new file mode 100644 index 000000000..4d18994b2 --- /dev/null +++ b/tcmalloc/testing/limit_test.cc @@ -0,0 +1,208 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/parameter_accessors.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/testing/testutil.h" + +namespace tcmalloc { +namespace { + +using ::testing::ContainsRegex; +using ::testing::HasSubstr; + +void DumpHeapStats(absl::string_view label) { + std::string buffer = MallocExtension::GetStats(); + absl::FPrintF(stderr, "%s\n%s\n", label, buffer); +} + +// Fixture for friend access to MallocExtension. +class LimitTest : public ::testing::Test { + protected: + void SetLimit(size_t limit, bool is_hard) { + MallocExtension::MemoryLimit v; + v.limit = limit; + v.hard = is_hard; + MallocExtension::SetMemoryLimit(v); + } + + size_t GetLimit(bool is_hard) { + MallocExtension::MemoryLimit v = MallocExtension::GetMemoryLimit(); + if (v.hard == is_hard) { + return v.limit; + } else { + // Return no limit, as we do not have a limit matching is_hard. + return std::numeric_limits::max(); + } + } + + // avoid fragmentation in local caches + void *malloc_pages(size_t bytes) { + CHECK_CONDITION(bytes % kPageSize == 0); + void *ptr; + CHECK_CONDITION(posix_memalign(&ptr, kPageSize, bytes) == 0); + return ptr; + } + + size_t physical_memory_used() { + std::map m = + MallocExtension::GetProperties(); + auto i = m.find("generic.physical_memory_used"); + CHECK_CONDITION(i != m.end()); + return i->second.value; + } +}; + +TEST_F(LimitTest, LimitRespected) { + static const size_t kLim = 4ul * 1024 * 1024 * 1024; + SetLimit(kLim, false); + + std::string statsBuf = MallocExtension::GetStats(); + std::string statsPbtxt = GetStatsInPbTxt(); + EXPECT_THAT(statsBuf, HasSubstr(absl::StrFormat( + "PARAMETER desired_usage_limit_bytes %u", kLim))); + EXPECT_THAT(statsBuf, HasSubstr("Number of times limit was hit: 0")); + EXPECT_THAT(statsPbtxt, HasSubstr(absl::StrFormat( + "desired_usage_limit_bytes: %u", kLim))); + EXPECT_THAT(statsPbtxt, HasSubstr("hard_limit: false")); + EXPECT_THAT(statsPbtxt, HasSubstr("limit_hits: 0")); + + // Avoid failing due to usage by test itself. + static const size_t kLimForUse = kLim * 9 / 10; + // First allocate many small objects... + size_t used = 0; + std::vector ptrs; + while (used < kLimForUse) { + ptrs.push_back(malloc_pages(kPageSize)); + used += kPageSize; + } + DumpHeapStats("after allocating small objects"); + // return much of the space, fragmented... + bool ret = false; + for (auto &p : ptrs) { + if (ret) { + free(p); + p = nullptr; + used -= kPageSize; + } + ret = !ret; + } + DumpHeapStats("after freeing many small objects"); + // Now ensure we can re use it for large allocations. + + while (used < kLimForUse) { + const size_t large = kPageSize * 10; + ptrs.push_back(malloc_pages(large)); + used += large; + } + DumpHeapStats("after allocating large objects"); + EXPECT_LE(physical_memory_used(), kLim); + + statsBuf = MallocExtension::GetStats(); + statsPbtxt = GetStatsInPbTxt(); + // The HugePageAwareAllocator hits the limit more than once. + EXPECT_THAT(statsBuf, + ContainsRegex(R"(Number of times limit was hit: [1-9]\d*)")); + EXPECT_THAT(statsPbtxt, ContainsRegex(R"(limit_hits: [1-9]\d*)")); + + for (auto p : ptrs) { + free(p); + } +} + + TEST_F(LimitTest, DISABLED_HardLimitRespected) { // b/143314400 + static const size_t kLim = 300 << 20; + SetLimit(kLim, true); + + std::string statsBuf = MallocExtension::GetStats(); + std::string statsPbtxt = GetStatsInPbTxt(); + EXPECT_THAT(statsBuf, + HasSubstr(absl::StrFormat( + "PARAMETER desired_usage_limit_bytes %u (hard)", kLim))); + EXPECT_THAT(statsPbtxt, HasSubstr(absl::StrFormat( + "desired_usage_limit_bytes: %u", kLim))); + EXPECT_THAT(statsPbtxt, HasSubstr("hard_limit: true")); + + void *ptr = malloc_pages(50 << 20); + ASSERT_DEATH(malloc_pages(400 << 20), "limit"); + free(ptr); + SetLimit(std::numeric_limits::max(), false); +} + +TEST_F(LimitTest, HardLimitRespectsNoSubrelease) { + static const size_t kLim = 300 << 20; + SetLimit(kLim, true); + TCMalloc_Internal_SetHPAASubrelease(false); + EXPECT_FALSE(TCMalloc_Internal_GetHPAASubrelease()); + + std::string statsBuf = MallocExtension::GetStats(); + std::string statsPbtxt = GetStatsInPbTxt(); + EXPECT_THAT(statsBuf, + HasSubstr(absl::StrFormat( + "PARAMETER desired_usage_limit_bytes %u (hard)", kLim))); + EXPECT_THAT(statsPbtxt, HasSubstr(absl::StrFormat( + "desired_usage_limit_bytes: %u", kLim))); + EXPECT_THAT(statsPbtxt, HasSubstr("hard_limit: true")); + + ASSERT_DEATH( + []() { + // Allocate a bunch of medium objects, free half of them to cause some + // fragmentation, then allocate some large objects. If we subrelease we + // could stay under our hard limit, but if we don't then we should go + // over. + std::vector ptrs; + constexpr size_t kNumMediumObjs = 400; + constexpr size_t kNumLargeObjs = 200; + for (size_t i = 0; i < kNumMediumObjs; i++) { + ptrs.push_back(::operator new(512 << 10)); + } + DumpHeapStats("after allocating medium objects"); + for (size_t i = 0; i < ptrs.size(); i++) { + if (i % 2) continue; + ::operator delete(ptrs[i]); + ptrs[i] = static_cast(0); + } + DumpHeapStats("after freeing half of medium objects"); + for (size_t i = 0; i < kNumLargeObjs; i++) { + ptrs.push_back(::operator new(1 << 20)); + } + DumpHeapStats("after allocating large objects"); + while (!ptrs.empty()) { + ::operator delete(ptrs.back()); + ptrs.pop_back(); + } + DumpHeapStats("after freeing all objects"); + }(), + "limit"); + SetLimit(std::numeric_limits::max(), false); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/markidle_unittest.cc b/tcmalloc/testing/markidle_unittest.cc new file mode 100644 index 000000000..fb4c72467 --- /dev/null +++ b/tcmalloc/testing/markidle_unittest.cc @@ -0,0 +1,88 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// MallocExtension::MarkThreadIdle() testing +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/testing/testutil.h" + +namespace tcmalloc { +namespace { + +// Helper routine to do lots of allocations +static void TestAllocation() { + static const int kNum = 1000; + void* ptr[kNum]; + for (int size = 8; size <= 65536; size*=2) { + for (int i = 0; i < kNum; i++) { + ptr[i] = malloc(size); + } + for (int i = 0; i < kNum; i++) { + free(ptr[i]); + } + } +} + +// Routine that does a bunch of MarkThreadIdle() calls in sequence +// without any intervening allocations +TEST(MarkIdleTest, MultipleIdleCalls) { + RunThread(+[]() { + for (int i = 0; i < 4; i++) { + MallocExtension::MarkThreadIdle(); + } + }); +} + +// Routine that does a bunch of MarkThreadIdle() calls in sequence +// with intervening allocations +TEST(MarkIdleTest, MultipleIdleNonIdlePhases) { + RunThread(+[]() { + for (int i = 0; i < 4; i++) { + TestAllocation(); + MallocExtension::MarkThreadIdle(); + } + }); +} + +// Get current thread cache usage +static size_t GetTotalThreadCacheSize() { + absl::optional result = MallocExtension::GetNumericProperty( + "tcmalloc.current_total_thread_cache_bytes"); + EXPECT_TRUE(result.has_value()); + return *result; +} + +// Check that MarkThreadIdle() actually reduces the amount +// of per-thread memory. +TEST(MarkIdleTest, TestIdleUsage) { + RunThread(+[]() { + const size_t original = GetTotalThreadCacheSize(); + + TestAllocation(); + const size_t post_allocation = GetTotalThreadCacheSize(); + ASSERT_GE(post_allocation, original); + + MallocExtension::MarkThreadIdle(); + const size_t post_idle = GetTotalThreadCacheSize(); + ASSERT_LE(post_idle, original); + }); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/memalign_unittest.cc b/tcmalloc/testing/memalign_unittest.cc new file mode 100644 index 000000000..6f9868eb6 --- /dev/null +++ b/tcmalloc/testing/memalign_unittest.cc @@ -0,0 +1,289 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Check memalign related routines. +// +// We can't really do a huge amount of checking, but at the very +// least, the following code checks that return values are properly +// aligned, and that writing into the objects works. + +#define _XOPEN_SOURCE 600 // to get posix_memalign +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/random/random.h" +#include "tcmalloc/testing/testutil.h" + +namespace tcmalloc { +namespace { + +// Return the next interesting size/delta to check. Returns -1 if no more. +int NextSize(int size) { + if (size < 100) { + return size+1; + } else if (size < 1048576) { + // Find next power of two + int power = 1; + while (power < size) { + power <<= 1; + } + + // Yield (power-1, power, power+1) + if (size < power-1) { + return power-1; + } else if (size == power-1) { + return power; + } else { + assert(size == power); + return power+1; + } + } else { + return -1; + } +} + +// Check alignment +void CheckAlignment(void* p, int align) { + ASSERT_EQ(0, reinterpret_cast(p) % align) + << "wrong alignment; wanted 0x" << std::hex << align << "; got " << p; +} + +// Fill a buffer of the specified size with a predetermined pattern +void Fill(void* p, int n, char seed) { + unsigned char* buffer = reinterpret_cast(p); + for (int i = 0; i < n; i++) { + buffer[i] = ((seed + i) & 0xff); + } +} + +// Check that the specified buffer has the predetermined pattern +// generated by Fill() +bool Valid(const void* p, int n, char seed) { + const unsigned char* buffer = reinterpret_cast(p); + for (int i = 0; i < n; i++) { + if (buffer[i] != ((seed + i) & 0xff)) { + return false; + } + } + return true; +} + +// Check that we do not fail catastrophically when we allocate a pointer with +// aligned_alloc and then realloc it. Note: realloc is not expected to +// preserve alignment. +TEST(MemalignTest, AlignedAllocRealloc) { + absl::BitGen rand; + + struct alloc { + void* ptr; + size_t size; + size_t alignment; + }; + + std::vector allocated; + for (int i = 0; i < 100; ++i) { + alloc a; + a.size = absl::LogUniform(rand, 0, 1 << 20); + a.alignment = 1 << absl::Uniform(rand, 0, 6); + + a.size = (a.size + a.alignment - 1) & ~(a.alignment - 1); + + a.ptr = aligned_alloc(a.alignment, a.size); + ASSERT_TRUE(a.ptr != nullptr); + ASSERT_EQ(0, reinterpret_cast(a.ptr) % + static_cast(a.alignment)); + allocated.emplace_back(a); + } + + for (int i = 0; i < 100; ++i) { + size_t new_size = absl::LogUniform(rand, 0, 1 << 20); + void* new_ptr = realloc(allocated[i].ptr, new_size); + ASSERT_TRUE(new_size == 0 || new_ptr != nullptr) + << allocated[i].size << " " << new_size; + allocated[i].ptr = new_ptr; + } + + for (int i = 0; i < 100; ++i) { + free(allocated[i].ptr); + } +} + +// Produces a vector of sizes to allocate, all with the specified alignment. +std::vector SizesWithAlignment(size_t align) { + std::vector v; + for (size_t s = 0; s < 100; s += align) { + v.push_back(s + align); + } + + for (size_t s = 128; s < 1048576; s *= 2) { + if (s <= align) { + continue; + } + + v.push_back(s - align); + v.push_back(s); + v.push_back(s + align); + } + + return v; +} + +TEST(MemalignTest, AlignedAlloc) { + // Try allocating data with a bunch of alignments and sizes + for (int a = 1; a < 1048576; a *= 2) { + for (auto s : SizesWithAlignment(a)) { + void* ptr = aligned_alloc(a, s); + CheckAlignment(ptr, a); + Fill(ptr, s, 'x'); + ASSERT_TRUE(Valid(ptr, s, 'x')); + free(ptr); + } + } + + // Grab some memory so that the big allocation below will definitely fail. + // This allocates 4MB of RAM, therefore the request below for 2^64-4KB*i will + // fail as it cannot possibly be represented in our address space, since + // 4MB + (2^64-4KB*i) > 2^64 for i = {1...kMinusNTimes} + void* p_small = malloc(4 * 1048576); + ASSERT_NE(nullptr, p_small); + + // Make sure overflow is returned as nullptr. + const size_t zero = 0; + static const size_t kMinusNTimes = 10; + for (size_t i = 1; i < kMinusNTimes; ++i) { + EXPECT_EQ(nullptr, aligned_alloc(1024, zero - 1024 * i)); + } + + free(p_small); +} + +#ifndef NDEBUG +TEST(MemalignTest, AlignedAllocDeathTest) { + EXPECT_DEATH(aligned_alloc(0, 1), ""); + EXPECT_DEATH(aligned_alloc(sizeof(void*) + 1, 1), ""); + EXPECT_DEATH(aligned_alloc(4097, 1), ""); +} +#endif + +TEST(MemalignTest, Memalign) { + // Try allocating data with a bunch of alignments and sizes + for (int a = 1; a < 1048576; a *= 2) { + for (auto s : SizesWithAlignment(a)) { + void* ptr = memalign(a, s); + CheckAlignment(ptr, a); + Fill(ptr, s, 'x'); + ASSERT_TRUE(Valid(ptr, s, 'x')); + free(ptr); + } + } + + { + // Check various corner cases + void* p1 = memalign(1<<20, 1<<19); + void* p2 = memalign(1<<19, 1<<19); + void* p3 = memalign(1<<21, 1<<19); + CheckAlignment(p1, 1<<20); + CheckAlignment(p2, 1<<19); + CheckAlignment(p3, 1<<21); + Fill(p1, 1<<19, 'a'); + Fill(p2, 1<<19, 'b'); + Fill(p3, 1<<19, 'c'); + ASSERT_TRUE(Valid(p1, 1 << 19, 'a')); + ASSERT_TRUE(Valid(p2, 1 << 19, 'b')); + ASSERT_TRUE(Valid(p3, 1 << 19, 'c')); + free(p1); + free(p2); + free(p3); + } +} + +TEST(MemalignTest, PosixMemalign) { + // Try allocating data with a bunch of alignments and sizes + for (int a = sizeof(void*); a < 1048576; a *= 2) { + for (auto s : SizesWithAlignment(a)) { + void* ptr; + ASSERT_EQ(0, posix_memalign(&ptr, a, s)); + CheckAlignment(ptr, a); + Fill(ptr, s, 'x'); + ASSERT_TRUE(Valid(ptr, s, 'x')); + free(ptr); + } + } +} + +TEST(MemalignTest, PosixMemalignFailure) { + void* ptr; + ASSERT_EQ(posix_memalign(&ptr, 0, 1), EINVAL); + ASSERT_EQ(posix_memalign(&ptr, sizeof(void*) / 2, 1), EINVAL); + ASSERT_EQ(posix_memalign(&ptr, sizeof(void*) + 1, 1), EINVAL); + ASSERT_EQ(posix_memalign(&ptr, 4097, 1), EINVAL); + + // Grab some memory so that the big allocation below will definitely fail. + void* p_small = malloc(4 * 1048576); + ASSERT_NE(p_small, nullptr); + + // Make sure overflow is returned as ENOMEM + const size_t zero = 0; + static const size_t kMinusNTimes = 10; + for (size_t i = 1; i < kMinusNTimes; ++i) { + int r = posix_memalign(&ptr, 1024, zero - i); + ASSERT_EQ(r, ENOMEM); + } + + free(p_small); +} + +TEST(MemalignTest, valloc) { + const int pagesize = getpagesize(); + + for (int s = 0; s != -1; s = NextSize(s)) { + void* p = valloc(s); + CheckAlignment(p, pagesize); + Fill(p, s, 'v'); + ASSERT_TRUE(Valid(p, s, 'v')); + free(p); + } +} + +TEST(MemalignTest, pvalloc) { + const int pagesize = getpagesize(); + + for (int s = 0; s != -1; s = NextSize(s)) { + void* p = pvalloc(s); + CheckAlignment(p, pagesize); + int alloc_needed = ((s + pagesize - 1) / pagesize) * pagesize; + Fill(p, alloc_needed, 'x'); + ASSERT_TRUE(Valid(p, alloc_needed, 'x')); + free(p); + } + + // should be safe to write upto a page in pvalloc(0) region + void* p = pvalloc(0); + Fill(p, pagesize, 'y'); + ASSERT_TRUE(Valid(p, pagesize, 'y')); + free(p); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/memory_errors_test.cc b/tcmalloc/testing/memory_errors_test.cc new file mode 100644 index 000000000..55f927565 --- /dev/null +++ b/tcmalloc/testing/memory_errors_test.cc @@ -0,0 +1,252 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "gtest/gtest.h" +#include "absl/base/attributes.h" +#include "absl/memory/memory.h" +#include "tcmalloc/common.h" +#include "tcmalloc/guarded_page_allocator.h" +#include "tcmalloc/internal/bits.h" +#include "tcmalloc/internal/declarations.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { +namespace { + +class GuardedAllocAlignmentTest : public testing::Test { + protected: + GuardedAllocAlignmentTest() { + profile_sampling_rate_ = MallocExtension::GetProfileSamplingRate(); + guarded_sample_rate_ = MallocExtension::GetGuardedSamplingRate(); + MallocExtension::SetProfileSamplingRate(1); // Always do heapz samples. + MallocExtension::SetGuardedSamplingRate(0); // Guard every heapz sample. + MallocExtension::ActivateGuardedSampling(); + + // Eat up unsampled bytes remaining to flush the new sample rates. + while (true) { + void *p = ::operator new(kPageSize); + if (tcmalloc::Static::guardedpage_allocator()->PointerIsMine(p)) { + ::operator delete(p); + break; + } + ::operator delete(p); + } + + // Ensure subsequent allocations are guarded. + void *p = ::operator new(1); + CHECK_CONDITION( + tcmalloc::Static::guardedpage_allocator()->PointerIsMine(p)); + ::operator delete(p); + } + + ~GuardedAllocAlignmentTest() { + MallocExtension::SetProfileSamplingRate(profile_sampling_rate_); + MallocExtension::SetGuardedSamplingRate(guarded_sample_rate_); + } + + private: + int64_t profile_sampling_rate_; + int32_t guarded_sample_rate_; +}; + +TEST_F(GuardedAllocAlignmentTest, Malloc) { + for (size_t lg = 0; lg <= kPageShift; lg++) { + size_t base_size = size_t{1} << lg; + const size_t sizes[] = {base_size - 1, base_size, base_size + 1}; + for (size_t size : sizes) { + void *p = malloc(size); + // TCMalloc currently always aligns mallocs to alignof(std::max_align_t), + // even for small sizes. If this ever changes, we can reduce the expected + // alignment here for sizes < alignof(std::max_align_t). + EXPECT_EQ(reinterpret_cast(p) % alignof(std::max_align_t), 0); + free(p); + } + } +} + +TEST_F(GuardedAllocAlignmentTest, PosixMemalign) { + for (size_t align = sizeof(void *); align <= kPageSize; align <<= 1) { + void *p = nullptr; + EXPECT_EQ(posix_memalign(&p, align, 1), 0); + EXPECT_EQ(reinterpret_cast(p) % align, 0); + benchmark::DoNotOptimize(p); + free(p); + } +} + +TEST_F(GuardedAllocAlignmentTest, New) { + for (size_t lg = 0; lg <= kPageShift; lg++) { + size_t base_size = size_t{1} << lg; + const size_t sizes[] = {base_size - 1, base_size, base_size + 1}; + for (size_t size : sizes) { + void *p = ::operator new(size); + + // In the absence of a user-specified alignment, the required alignment + // for operator new is never larger than the size rounded up to the next + // power of 2. GuardedPageAllocator uses this fact to minimize alignment + // padding between the end of small allocations and their guard pages. + int lg_size = + std::max(tcmalloc::tcmalloc_internal::Bits::Log2Ceiling(size), 0); + size_t expected_align = std::min(size_t{1} << lg_size, kAlignment); + + EXPECT_EQ(reinterpret_cast(p) % expected_align, 0); + ::operator delete(p); + } + } +} + +TEST_F(GuardedAllocAlignmentTest, AlignedNew) { + for (size_t align = 1; align <= kPageSize; align <<= 1) { + void *p = ::operator new(1, static_cast(align)); + EXPECT_EQ(reinterpret_cast(p) % align, 0); + ::operator delete(p); + } +} + +class TcMallocTest : public testing::Test { + protected: + TcMallocTest() { + MallocExtension::SetGuardedSamplingRate( + 100 * MallocExtension::GetProfileSamplingRate()); + + // Prevent SEGV handler from writing XML properties in death tests. + unsetenv("XML_OUTPUT_FILE"); + } +}; + +TEST_F(TcMallocTest, UnderflowReadDetected) { + auto RepeatUnderflowRead = []() { + for (int i = 0; i < 1000000; i++) { + auto buf = absl::make_unique(kPageSize / 2); + benchmark::DoNotOptimize(buf); + // TCMalloc may crash without a GWP-ASan report if we underflow a regular + // allocation. Make sure we have a guarded allocation. + if (tcmalloc::Static::guardedpage_allocator()->PointerIsMine(buf.get())) { + volatile char sink = buf[-1]; + benchmark::DoNotOptimize(sink); + } + } + }; + EXPECT_DEATH(RepeatUnderflowRead(), + "Buffer underflow occurs in thread [0-9]+ at"); +} + +TEST_F(TcMallocTest, OverflowReadDetected) { + auto RepeatOverflowRead = []() { + for (int i = 0; i < 1000000; i++) { + auto buf = absl::make_unique(kPageSize / 2); + benchmark::DoNotOptimize(buf); + // TCMalloc may crash without a GWP-ASan report if we overflow a regular + // allocation. Make sure we have a guarded allocation. + if (tcmalloc::Static::guardedpage_allocator()->PointerIsMine(buf.get())) { + volatile char sink = buf[kPageSize / 2]; + benchmark::DoNotOptimize(sink); + } + } + }; + EXPECT_DEATH(RepeatOverflowRead(), + "Buffer overflow occurs in thread [0-9]+ at"); +} + +TEST_F(TcMallocTest, UseAfterFreeDetected) { + auto RepeatUseAfterFree = []() { + for (int i = 0; i < 1000000; i++) { + char *sink_buf = new char[kPageSize]; + benchmark::DoNotOptimize(sink_buf); + delete[] sink_buf; + volatile char sink = sink_buf[0]; + benchmark::DoNotOptimize(sink); + } + }; + EXPECT_DEATH(RepeatUseAfterFree(), + "Use-after-free occurs in thread [0-9]+ at"); +} + +// Double free triggers an ASSERT within TCMalloc in non-opt builds. So only +// run this test for opt builds. +#ifdef NDEBUG +TEST_F(TcMallocTest, DoubleFreeDetected) { + auto RepeatDoubleFree = []() { + for (int i = 0; i < 1000000; i++) { + void *buf = ::operator new(kPageSize); + ::operator delete(buf); + // TCMalloc often SEGVs on double free (without GWP-ASan report). Make + // sure we have a guarded allocation before double-freeing. + if (tcmalloc::Static::guardedpage_allocator()->PointerIsMine(buf)) { + ::operator delete(buf); + } + } + }; + EXPECT_DEATH(RepeatDoubleFree(), "Double free occurs in thread [0-9]+ at"); +} +#endif + +TEST_F(TcMallocTest, OverflowWriteDetectedAtFree) { + auto RepeatOverflowWrite = []() { + for (int i = 0; i < 1000000; i++) { + // Make buffer smaller than kPageSize to test detection-at-free of write + // overflows. + auto sink_buf = absl::make_unique(kPageSize - 1); + benchmark::DoNotOptimize(sink_buf); + sink_buf[kPageSize - 1] = '\0'; + } + }; + EXPECT_DEATH(RepeatOverflowWrite(), + "Buffer overflow \\(write\\) detected in thread [0-9]+ at free"); +} + +TEST_F(TcMallocTest, ReallocNoFalsePositive) { + for (int i = 0; i < 1000000; i++) { + auto sink_buf = reinterpret_cast(malloc(kPageSize - 1)); + benchmark::DoNotOptimize(sink_buf); + sink_buf = reinterpret_cast(realloc(sink_buf, kPageSize)); + sink_buf[kPageSize - 1] = '\0'; + benchmark::DoNotOptimize(sink_buf); + free(sink_buf); + } +} + +TEST_F(TcMallocTest, OffsetAndLength) { + auto RepeatUseAfterFree = [](size_t buffer_len, off_t access_offset) { + for (int i = 0; i < 1000000; i++) { + void *buf = ::operator new(buffer_len); + ::operator delete(buf); + // TCMalloc may crash without a GWP-ASan report if we overflow a regular + // allocation. Make sure we have a guarded allocation. + if (tcmalloc::Static::guardedpage_allocator()->PointerIsMine(buf)) { + volatile char sink = static_cast(buf)[access_offset]; + benchmark::DoNotOptimize(sink); + } + } + }; + EXPECT_DEATH(RepeatUseAfterFree(3999, -42), + ">>> Access at offset -42 into buffer of length 3999"); + EXPECT_DEATH(RepeatUseAfterFree(6543, 1221), + ">>> Access at offset 1221 into buffer of length 6543"); + EXPECT_DEATH(RepeatUseAfterFree(8192, 8484), + ">>> Access at offset 8484 into buffer of length 8192"); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/no_deps_test.cc b/tcmalloc/testing/no_deps_test.cc new file mode 100644 index 000000000..675bfbfe8 --- /dev/null +++ b/tcmalloc/testing/no_deps_test.cc @@ -0,0 +1,42 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Simple malloc test that does not depend on //base. +// Useful as the first test during tcmalloc development: first, it builds fast; +// second, it does not trigger rebuild of host binaries and regeneration of +// auto-generated files as the result a crash happens in this test rather than +// in a host binary executed from blaze. + +#include + +#include +#include + +const size_t kMem = 10 << 20; +const size_t kMin = 8; +void *blocks[kMem / kMin]; + +int main() { + size_t step = 16; + for (size_t size = kMin; size <= kMem; size += step) { + const size_t count = kMem / size; + for (size_t i = 0; i < count; i++) { + blocks[i] = malloc(size); + } + for (size_t i = 0; i < count; i++) { + free(blocks[i]); + } + step = std::max(step, size / 32); + } +} diff --git a/tcmalloc/testing/outofmemory_test.cc b/tcmalloc/testing/outofmemory_test.cc new file mode 100644 index 000000000..8d6f5ae6b --- /dev/null +++ b/tcmalloc/testing/outofmemory_test.cc @@ -0,0 +1,48 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Test out of memory handling. Kept in a separate test since running out +// of memory causes other parts of the runtime to behave improperly. + +#include +#include + +#include "gtest/gtest.h" +#include "tcmalloc/testing/testutil.h" + +namespace tcmalloc { +namespace { + +class OutOfMemoryTest : public ::testing::Test { + public: + OutOfMemoryTest() { SetTestResourceLimit(); } +}; + +TEST_F(OutOfMemoryTest, TestUntilFailure) { + // Check that large allocations fail with NULL instead of crashing. + static const size_t kIncrement = 100 << 20; + static const size_t kMaxSize = ~static_cast(0); + for (size_t s = kIncrement; s < kMaxSize - kIncrement; s += kIncrement) { + SCOPED_TRACE(s); + void* large_object = malloc(s); + if (large_object == nullptr) { + return; + } + free(large_object); + } + ASSERT_TRUE(false) << "Did not run out of memory"; +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/releasing_test.cc b/tcmalloc/testing/releasing_test.cc new file mode 100644 index 000000000..2b6041cc0 --- /dev/null +++ b/tcmalloc/testing/releasing_test.cc @@ -0,0 +1,169 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This tests the memory accounting when releasing memory to the OS. As this +// requires careful memory accounting, we avoid allocating at critical times and +// avoid Google Test/background threads. + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "absl/random/random.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/memory_stats.h" +#include "tcmalloc/malloc_extension.h" + +namespace { + +int64_t GetRSS() { + tcmalloc::tcmalloc_internal::MemoryStats stats; + CHECK_CONDITION(tcmalloc::tcmalloc_internal::GetMemoryStats(&stats)); + return stats.rss; +} + +int64_t UnmappedBytes() { + absl::optional value = tcmalloc::MallocExtension::GetNumericProperty( + "tcmalloc.pageheap_unmapped_bytes"); + CHECK_CONDITION(value.has_value()); + return *value; +} + +} // namespace + +int main() { + int ret = mlockall(MCL_CURRENT | MCL_FUTURE); + if (ret != 0) { + const bool kSoftFail = true; + + if (kSoftFail) { + // Determine if we should be able to mlock memory due to our limits. + struct rlimit lock_limit; + if (getrlimit(RLIMIT_MEMLOCK, &lock_limit) != 0) { + tcmalloc::Log(tcmalloc::kCrash, __FILE__, __LINE__, + "getrlimit failed: errno", errno); + } + + if (lock_limit.rlim_cur != RLIM_INFINITY && errno == ENOMEM) { + tcmalloc::Log(tcmalloc::kLog, __FILE__, __LINE__, + "mlockall failed: errno", errno, " mlock limit ", + lock_limit.rlim_cur); + return 0; + } + } + tcmalloc::Log(tcmalloc::kCrash, __FILE__, __LINE__, + "mlockall failed: errno", errno); + } + + const int kSmallAllocations = 1000; + const size_t kSmallSize = 256 * 1024; + const int kLargeAllocations = 1000; + const size_t kLargeSize = 4 * 1024 * 1024; + + std::vector ptrs; + ptrs.reserve(kSmallAllocations + kLargeAllocations); + + absl::BitGen rng; + for (int i = 0; i < kSmallAllocations; i++) { + size_t size = absl::LogUniform(rng, 0, kSmallSize); + void* ptr = ::operator new(size); + memset(ptr, 0xCD, size); + ::benchmark::DoNotOptimize(ptr); + ptrs.push_back(ptr); + } + + for (int i = 0; i < kLargeAllocations; i++) { + size_t size = absl::LogUniform(rng, kLargeSize / 2, kLargeSize); + void* ptr = ::operator new(size); + memset(ptr, 0xCD, size); + ::benchmark::DoNotOptimize(ptr); + ptrs.push_back(ptr); + } + + int64_t before, after, before_unmapped, after_unmapped; + // Release all of the memory that we can. Verify that RSS change corresponds + // to what the release logic did. + + before = GetRSS(); + before_unmapped = UnmappedBytes(); + + // Clean up. + for (void* ptr : ptrs) { + ::operator delete(ptr); + } + + // Try to release memory TCMalloc thinks it does not need. + tcmalloc::MallocExtension::ReleaseMemoryToSystem(0); + after = GetRSS(); + after_unmapped = UnmappedBytes(); + + int64_t unmapped_diff = after_unmapped - before_unmapped; + int64_t memusage_diff = before - after; + if (unmapped_diff < 0) { + tcmalloc::Log(tcmalloc::kCrash, __FILE__, __LINE__, "Memory was mapped."); + } else if (unmapped_diff % tcmalloc::kHugePageSize != 0) { + tcmalloc::Log(tcmalloc::kCrash, __FILE__, __LINE__, + "Non-hugepage size for unmapped memory: ", unmapped_diff); + } + + // Try to release all unused memory. + + tcmalloc::MallocExtension::ReleaseMemoryToSystem( + std::numeric_limits::max()); + after = GetRSS(); + after_unmapped = UnmappedBytes(); + + unmapped_diff = after_unmapped - before_unmapped; + memusage_diff = before - after; + const double kTolerance = 5e-4; + + tcmalloc::Log(tcmalloc::kLog, __FILE__, __LINE__, "Unmapped Memory [Before]", + before_unmapped); + tcmalloc::Log(tcmalloc::kLog, __FILE__, __LINE__, "Unmapped Memory [After ]", + after_unmapped); + tcmalloc::Log(tcmalloc::kLog, __FILE__, __LINE__, "Unmapped Memory [Diff ]", + after_unmapped - before_unmapped); + tcmalloc::Log(tcmalloc::kLog, __FILE__, __LINE__, "Memory Usage [Before]", + before); + tcmalloc::Log(tcmalloc::kLog, __FILE__, __LINE__, "Memory Usage [After ]", + after); + tcmalloc::Log(tcmalloc::kLog, __FILE__, __LINE__, "Memory Usage [Diff ]", + before - after); + + if (unmapped_diff == 0) { + tcmalloc::Log(tcmalloc::kCrash, __FILE__, __LINE__, + "No memory was unmapped."); + } + + if (unmapped_diff * (1. + kTolerance) < memusage_diff || + unmapped_diff * (1. - kTolerance) > memusage_diff) { + tcmalloc::Log(tcmalloc::kCrash, + "(after_unmapped - before_unmapped) != (before - after)", + after_unmapped - before_unmapped, before - after); + } + + printf("PASS\n"); + return 0; +} diff --git a/tcmalloc/testing/sample_size_class_test.cc b/tcmalloc/testing/sample_size_class_test.cc new file mode 100644 index 000000000..941fc3bdb --- /dev/null +++ b/tcmalloc/testing/sample_size_class_test.cc @@ -0,0 +1,104 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "tcmalloc/internal/linked_list.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" + +// This tests that heap profiling works properly in the face of allocations +// being rounded up to the next size class. +// +// We're checking for the following bug (or something similar): +// +// Suppose client code calls malloc(17) many times. tcmalloc will round these +// allocations up to the next size class, which happens to be 32 bytes. +// +// A bug arises if the probability that tcmalloc samples a given allocation is +// a function of the requested size (17), rather than the allocated size (32). +// +// As part of processing profiles, we reverse the effect of sampling to get an +// approximation of the actual total usage; if we do this reversal based on +// allocated size, but the sampling was actually done on requested size, we will +// under-count these allocation. + +namespace tcmalloc { +namespace { + +// Return number of bytes of live data of size s according to the heap profile. +double HeapProfileReport(size_t s) { + double result = 0; + + auto profile = MallocExtension::SnapshotCurrent(ProfileType::kHeap); + profile.Iterate([&](const Profile::Sample& e) { + if (e.allocated_size == s) { + result += e.sum; + } + }); + + return result; +} + +TEST(SampleSizeClassTest, Main) { + // We choose a small tcmalloc sampling parameter because this reduces the + // random variance in this test's result. + MallocExtension::SetProfileSamplingRate(1024); + // Disable GWP-ASan since it doesn't use size classes. + MallocExtension::SetGuardedSamplingRate(-1); + + // Make a huge allocation that's very likely to be sampled to clear + // out the current sample point; ensures all our allocations are + // actually sampled at the above rate. + ::operator delete(::operator new(1024 * 1024 * 1024)); + + // Pick kRequestSize so that it changes significantly when it is + // rounded up by tcmalloc. If this changes, you may want to pick a + // new kRequestSize. + const size_t kRequestSize = 17; + const size_t kActualSize = 32; + void* p = malloc(kRequestSize); + EXPECT_EQ(kActualSize, MallocExtension::GetAllocatedSize(p)); + free(p); + + // Allocate a large amount of data. We construct a linked list with the + // pointers to avoid having to allocate auxillary data for keeping track of + // all of the allocations. + const double start = HeapProfileReport(kActualSize); + size_t allocated = 0; + LinkedList objs; + objs.Init(); + while (allocated < 128*1024*1024) { + // We must use the return value from malloc, otherwise the compiler may + // optimize out the call altogether! + void* ptr = malloc(kRequestSize); + EXPECT_NE(nullptr, ptr); + objs.Push(ptr); + allocated += kActualSize; + } + const double finish = HeapProfileReport(kActualSize); + + EXPECT_NEAR(allocated, finish - start, 0.05 * allocated); + + void* ptr; + while (objs.TryPop(&ptr)) { + free(ptr); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/sampler_test.cc b/tcmalloc/testing/sampler_test.cc new file mode 100755 index 000000000..ea9557cfb --- /dev/null +++ b/tcmalloc/testing/sampler_test.cc @@ -0,0 +1,489 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Checks basic properties of the sampler + +#include "tcmalloc/sampler.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/testing/testutil.h" + +// Back-door so we can access Sampler internals. +namespace tcmalloc { + +class SamplerTest { + public: + static void Init(Sampler* s, uint64_t seed) { s->Init(seed); } +}; + +namespace { + +// Note that these tests are stochastic. +// This mean that the chance of correct code passing the test is, +// in the case of 5 standard deviations: +// kSigmas=5: ~99.99994267% +// in the case of 4 standard deviations: +// kSigmas=4: ~99.993666% +static const double kSigmas = 4; +static const size_t kSamplingInterval = + MallocExtension::GetProfileSamplingRate(); +static const size_t kGuardedSamplingInterval = 100 * kSamplingInterval; + +// Tests that GetSamplePeriod returns the expected value +// which is 1<<19 +TEST(Sampler, TestGetSamplePeriod) { + tcmalloc::Sampler sampler; + SamplerTest::Init(&sampler, 1); + uint64_t sample_period; + sample_period = sampler.GetSamplePeriod(); + EXPECT_GT(sample_period, 0); +} + +// Tests of the quality of the random numbers generated +// This uses the Anderson Darling test for uniformity. +// See "Evaluating the Anderson-Darling Distribution" by Marsaglia +// for details. + +// Short cut version of ADinf(z), z>0 (from Marsaglia) +// This returns the p-value for Anderson Darling statistic in +// the limit as n-> infinity. For finite n, apply the error fix below. +double AndersonDarlingInf(double z) { + if (z < 2) { + return exp(-1.2337141 / z) / sqrt(z) * (2.00012 + (0.247105 - + (0.0649821 - (0.0347962 - (0.011672 - 0.00168691 + * z) * z) * z) * z) * z); + } + return exp( - exp(1.0776 - (2.30695 - (0.43424 - (0.082433 - + (0.008056 - 0.0003146 * z) * z) * z) * z) * z)); +} + +// Corrects the approximation error in AndersonDarlingInf for small values of n +// Add this to AndersonDarlingInf to get a better approximation +// (from Marsaglia) +double AndersonDarlingErrFix(int n, double x) { + if (x > 0.8) { + return (-130.2137 + (745.2337 - (1705.091 - (1950.646 - + (1116.360 - 255.7844 * x) * x) * x) * x) * x) / n; + } + double cutoff = 0.01265 + 0.1757 / n; + double t; + if (x < cutoff) { + t = x / cutoff; + t = sqrt(t) * (1 - t) * (49 * t - 102); + return t * (0.0037 / (n * n) + 0.00078 / n + 0.00006) / n; + } else { + t = (x - cutoff) / (0.8 - cutoff); + t = -0.00022633 + (6.54034 - (14.6538 - (14.458 - (8.259 - 1.91864 + * t) * t) * t) * t) * t; + return t * (0.04213 + 0.01365 / n) / n; + } +} + +// Returns the AndersonDarling p-value given n and the value of the statistic +double AndersonDarlingPValue(int n, double z) { + double ad = AndersonDarlingInf(z); + double errfix = AndersonDarlingErrFix(n, ad); + return ad + errfix; +} + +double AndersonDarlingStatistic(const std::vector& random_sample) { + int n = random_sample.size(); + double ad_sum = 0; + for (int i = 0; i < n; i++) { + ad_sum += (2 * i + 1) * + std::log(random_sample[i] * (1 - random_sample[n - 1 - i])); + } + double ad_statistic = - n - 1/static_cast(n) * ad_sum; + return ad_statistic; +} + +// Tests if the array of doubles is uniformly distributed. +// Returns the p-value of the Anderson Darling Statistic +// for the given set of sorted random doubles +// See "Evaluating the Anderson-Darling Distribution" by +// Marsaglia and Marsaglia for details. +double AndersonDarlingTest(const std::vector& random_sample) { + double ad_statistic = AndersonDarlingStatistic(random_sample); + double p = AndersonDarlingPValue(random_sample.size(), ad_statistic); + return p; +} + +// Testing that NextRandom generates uniform +// random numbers. +// Applies the Anderson-Darling test for uniformity +void TestNextRandom(int n) { + tcmalloc::Sampler sampler; + SamplerTest::Init(&sampler, 1); + uint64_t x = 1; + // This assumes that the prng returns 48 bit numbers + uint64_t max_prng_value = static_cast(1)<<48; + // Initialize + for (int i = 1; i <= 20; i++) { // 20 mimics sampler.Init() + x = sampler.NextRandom(x); + } + std::vector int_random_sample(n); + // Collect samples + for (int i = 0; i < n; i++) { + int_random_sample[i] = x; + x = sampler.NextRandom(x); + } + // First sort them... + std::sort(int_random_sample.begin(), int_random_sample.end()); + std::vector random_sample(n); + // Convert them to uniform randoms (in the range [0,1]) + for (int i = 0; i < n; i++) { + random_sample[i] = static_cast(int_random_sample[i])/max_prng_value; + } + // Now compute the Anderson-Darling statistic + double ad_pvalue = AndersonDarlingTest(random_sample); + EXPECT_GT(std::min(ad_pvalue, 1 - ad_pvalue), 0.0001) + << "prng is not uniform: n = " << n << " p = " << ad_pvalue; +} + +TEST(Sampler, TestNextRandom_MultipleValues) { + TestNextRandom(10); // Check short-range correlation + TestNextRandom(100); + TestNextRandom(1000); + TestNextRandom(10000); // Make sure there's no systematic error +} + +void TestSampleAndersonDarling(int sample_period, + std::vector* sample) { + // First sort them... + std::sort(sample->begin(), sample->end()); + int n = sample->size(); + std::vector random_sample(n); + // Convert them to uniform random numbers + // by applying the geometric CDF + for (int i = 0; i < n; i++) { + random_sample[i] = + 1 - exp(-static_cast((*sample)[i]) / sample_period); + } + // Now compute the Anderson-Darling statistic + double geom_ad_pvalue = AndersonDarlingTest(random_sample); + EXPECT_GT(std::min(geom_ad_pvalue, 1 - geom_ad_pvalue), 0.0001) + << "PickNextSamplingPoint does not produce good " + "geometric/exponential random numbers " + "n = " + << n << " p = " << geom_ad_pvalue; +} + +// Tests that PickNextSamplePeriod generates +// geometrically distributed random numbers. +// First converts to uniforms then applied the +// Anderson-Darling test for uniformity. +void TestPickNextSample(int n) { + tcmalloc::Sampler sampler; + SamplerTest::Init(&sampler, 1); + std::vector int_random_sample(n); + int sample_period = sampler.GetSamplePeriod(); + int ones_count = 0; + for (int i = 0; i < n; i++) { + int_random_sample[i] = sampler.PickNextSamplingPoint(); + EXPECT_GE(int_random_sample[i], 1); + if (int_random_sample[i] == 1) { + ones_count += 1; + } + EXPECT_LT(ones_count, 4) << " out of " << i << " samples."; + } + TestSampleAndersonDarling(sample_period, &int_random_sample); +} + +TEST(Sampler, TestPickNextSample_MultipleValues) { + TestPickNextSample(10); // Make sure the first few are good (enough) + TestPickNextSample(100); + TestPickNextSample(1000); + TestPickNextSample(10000); // Make sure there's no systematic error +} + +void TestPickNextGuardedSample(int n) { + tcmalloc::Sampler sampler; + SamplerTest::Init(&sampler, 1); + std::vector int_random_sample(n); + for (int i = 0; i < n; i++) { + int_random_sample[i] = 1 + sampler.PickNextGuardedSamplingPoint(); + ASSERT_GE(int_random_sample[i], 1); + } + TestSampleAndersonDarling(kGuardedSamplingInterval / kSamplingInterval, + &int_random_sample); +} + +TEST(Sampler, TestPickNextGuardedSample_MultipleValues) { + ScopedGuardedSamplingRate s(kGuardedSamplingInterval); + + TestPickNextGuardedSample(10); // Make sure the first few are good (enough) + TestPickNextGuardedSample(100); + TestPickNextGuardedSample(1000); + TestPickNextGuardedSample(10000); // Make sure there's no systematic error +} + +// Further tests + +double StandardDeviationsErrorInSample( + int total_samples, int picked_samples, + int alloc_size, int sampling_interval) { + double p = 1 - exp(-(static_cast(alloc_size) / sampling_interval)); + double expected_samples = total_samples * p; + double sd = pow(p*(1-p)*total_samples, 0.5); + return((picked_samples - expected_samples) / sd); +} + +TEST(Sampler, LargeAndSmallAllocs_CombinedTest) { + tcmalloc::Sampler sampler; + SamplerTest::Init(&sampler, 1); + int counter_big = 0; + int counter_small = 0; + int size_big = 129*8*1024+1; + int size_small = 1024*8; + int num_iters = 128*4*8; + // Allocate in mixed chunks + for (int i = 0; i < num_iters; i++) { + if (sampler.RecordAllocation(size_big)) { + counter_big += 1; + } + for (int i = 0; i < 129; i++) { + if (sampler.RecordAllocation(size_small)) { + counter_small += 1; + } + } + } + // Now test that there are the right number of each + double large_allocs_sds = + StandardDeviationsErrorInSample(num_iters, counter_big, + size_big, kSamplingInterval); + double small_allocs_sds = + StandardDeviationsErrorInSample(num_iters*129, counter_small, + size_small, kSamplingInterval); + ASSERT_LE(fabs(large_allocs_sds), kSigmas) << large_allocs_sds; + ASSERT_LE(fabs(small_allocs_sds), kSigmas) << small_allocs_sds; +} + +TEST(Sampler, TestShouldSampleGuardedAllocation) { + ScopedGuardedSamplingRate s(kGuardedSamplingInterval); + + tcmalloc::Sampler sampler; + SamplerTest::Init(&sampler, 1); + int counter = 0; + int num_iters = 10000; + for (int i = 0; i < num_iters; i++) { + if (sampler.ShouldSampleGuardedAllocation()) { + counter++; + } + } + double sd = StandardDeviationsErrorInSample( + num_iters, counter, /*alloc_size=*/1, + kGuardedSamplingInterval / kSamplingInterval); + EXPECT_LE(fabs(sd), kSigmas); +} + +template +void DoCheckMean(size_t mean, int num_samples, Body next_sampling_point) { + size_t total = 0; + for (int i = 0; i < num_samples; i++) { + total += next_sampling_point(); + } + double empirical_mean = total / static_cast(num_samples); + double expected_sd = mean / pow(num_samples * 1.0, 0.5); + EXPECT_LT(fabs(mean - empirical_mean), expected_sd * kSigmas); +} + +void CheckMean(size_t mean, int num_samples, bool guarded) { + tcmalloc::Sampler sampler; + SamplerTest::Init(&sampler, 1); + DoCheckMean(mean, num_samples, [guarded, &sampler]() { + if (guarded) { + return sampler.PickNextGuardedSamplingPoint(); + } else { + return sampler.PickNextSamplingPoint(); + } + }); +} + +// Tests whether the mean is about right over 1000 samples +TEST(Sampler, IsMeanRight) { + ScopedGuardedSamplingRate s(kGuardedSamplingInterval); + + CheckMean(kSamplingInterval, 1000, /*guarded=*/false); + CheckMean(kGuardedSamplingInterval / kSamplingInterval, 1000, + /*guarded=*/true); +} + +// This checks that the stated maximum value for the sampling rate never +// overflows bytes_until_sample_ +TEST(Sampler, bytes_until_sample_Overflow_Underflow) { + tcmalloc::Sampler sampler; + SamplerTest::Init(&sampler, 1); + uint64_t one = 1; + // sample_rate = 0; // To test the edge case + uint64_t sample_rate_array[4] = {0, 1, one << 19, one << 58}; + for (int i = 0; i < 4; i++) { + uint64_t sample_rate = sample_rate_array[i]; + SCOPED_TRACE(sample_rate); + + double sample_scaling = -std::log(2.0) * sample_rate; + // Take the top 26 bits as the random number + // (This plus the 1<<26 sampling bound give a max step possible of + // 1209424308 bytes.) + const uint64_t prng_mod_power = 48; // Number of bits in prng + + // First, check the largest_prng value + uint64_t largest_prng_value = (static_cast(1)<<48) - 1; + double q = (largest_prng_value >> (prng_mod_power - 26)) + 1.0; + uint64_t smallest_sample_step = + 1 + static_cast((std::log2(q) - 26) * sample_scaling); + uint64_t cutoff = + static_cast(10) * (sample_rate / (one << 24) + 1); + // This checks that the answer is "small" and positive + ASSERT_LE(smallest_sample_step, cutoff); + + // Next, check with the smallest prng value + uint64_t smallest_prng_value = 0; + q = (smallest_prng_value >> (prng_mod_power - 26)) + 1.0; + uint64_t largest_sample_step = + 1 + static_cast((std::log2(q) - 26) * sample_scaling); + ASSERT_LE(largest_sample_step, one << 63); + ASSERT_GE(largest_sample_step, smallest_sample_step); + } +} + + +// Test that NextRand is in the right range. Unfortunately, this is a +// stochastic test which could miss problems. +TEST(Sampler, NextRand_range) { + tcmalloc::Sampler sampler; + SamplerTest::Init(&sampler, 1); + uint64_t one = 1; + // The next number should be (one << 48) - 1 + uint64_t max_value = (one << 48) - 1; + uint64_t x = (one << 55); + int n = 22; // 27; + for (int i = 1; i <= (1<> (prng_mod_power - 26)) + 1.0; + ASSERT_GE(q, 0) << rnd << " " << prng_mod_power; + } + // Test some potentially out of bounds value for rnd + for (int i = 1; i < 64; i++) { + rnd = one << i; + double q = (rnd >> (prng_mod_power - 26)) + 1.0; + ASSERT_GE(q, 0) << " rnd=" << rnd << " i=" << i << " prng_mod_power" + << prng_mod_power; + } +} + +// Tests certain arithmetic operations to make sure they compute what we +// expect them too (for testing across different platforms) +// know bad values under with -c dbg --cpu piii for _some_ binaries: +// rnd=227453640600554 +// shifted_rnd=54229173 +// (hard to reproduce) +TEST(Sampler, arithmetic_2) { + uint64_t rnd{227453640600554}; + + const uint64_t prng_mod_power = 48; // Number of bits in prng + uint64_t shifted_rnd = rnd >> (prng_mod_power - 26); + ASSERT_LT(shifted_rnd, (1 << 26)); + ASSERT_GE(static_cast(static_cast(shifted_rnd)), 0) + << " rnd=" << rnd << " srnd=" << shifted_rnd; + ASSERT_GE(static_cast(shifted_rnd), 0) + << " rnd=" << rnd << " srnd=" << shifted_rnd; + double q = static_cast(shifted_rnd) + 1.0; + ASSERT_GT(q, 0); +} + +// It's not really a test, but it's good to know +TEST(Sampler, size_of_class) { + tcmalloc::Sampler sampler; + SamplerTest::Init(&sampler, 1); + EXPECT_LE(sizeof(sampler), 48); +} + +TEST(Sampler, stirring) { + // Lets test that we get somewhat random values from sampler even when we're + // dealing with Samplers that have same addresses, as we see when thread's TLS + // areas are reused. b/117296263 + + absl::aligned_storage_t + place; + + DoCheckMean(kSamplingInterval, 1000, [&place]() { + tcmalloc::Sampler* sampler = new (&place) tcmalloc::Sampler; + // Sampler constructor just 0-initializes + // everything. RecordAllocation really makes sampler initialize + // itself. + sampler->RecordAllocation(1); + // And then we probe sampler's (second) value. + size_t retval = sampler->PickNextSamplingPoint(); + sampler->tcmalloc::Sampler::~Sampler(); + return retval; + }); +} + +// Tests that the weights returned by RecordAllocation match the sampling rate. +TEST(Sampler, weight_distribution) { + static constexpr size_t sizes[] = { + 0, 1, 8, 198, 1024, 1152, 3712, 1 << 16, 1 << 25, 50 << 20, 1 << 30}; + + for (auto size : sizes) { + SCOPED_TRACE(size); + + tcmalloc::Sampler s; + SamplerTest::Init(&s, 1); + + static constexpr int kSamples = 10000; + double expected = + (size + 1) / (1.0 - exp(-1.0 * (size + 1) / s.GetSamplePeriod())); + // Since each sample requires ~2MiB / size iterations, using fewer samples + // for the small sizes makes this test run in ~2s vs. ~90s on Forge in 2019. + DoCheckMean(expected, size < 256 ? 100 : kSamples, [size, &s]() { + size_t weight = 0; + while (!(weight = s.RecordAllocation(size))) { + } + return weight; + }); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/sampling_memusage_test.cc b/tcmalloc/testing/sampling_memusage_test.cc new file mode 100644 index 000000000..fc3b51686 --- /dev/null +++ b/tcmalloc/testing/sampling_memusage_test.cc @@ -0,0 +1,168 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include +#include + +#include "benchmark/benchmark.h" +#include "gtest/gtest.h" +#include "absl/base/internal/sysinfo.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/util.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/static_vars.h" + +namespace tcmalloc { +namespace { + +using tcmalloc_internal::AllowedCpus; +using tcmalloc_internal::ScopedAffinityMask; + +class SamplingMemoryTest : public ::testing::TestWithParam { + protected: + SamplingMemoryTest() { + MallocExtension::SetGuardedSamplingRate(-1); +#ifdef TCMALLOC_256K_PAGES + // For 256k pages, the sampling overhead is larger. Reduce + // the sampling period to 1<<24 + MallocExtension::SetProfileSamplingRate(1 << 24); +#endif + } + + size_t Property(absl::string_view name) { + absl::optional result = MallocExtension::GetNumericProperty(name); + CHECK_CONDITION(result.has_value()); + return *result; + } + + void SetSamplingInterval(int64_t val) { + MallocExtension::SetProfileSamplingRate(val); + // We do this to reset the per-thread sampler - it may have a + // very large gap put in here if sampling had been disabled. + ::operator delete(::operator new(1024 * 1024 * 1024)); + } + + size_t CurrentHeapSize() { + const size_t result = Property("generic.current_allocated_bytes") + + Property("tcmalloc.metadata_bytes"); + return result; + } + + // Return peak memory usage growth when allocating many "size" byte objects. + ssize_t HeapGrowth(size_t size) { + if (size < sizeof(void*)) { + size = sizeof(void*); // Must be able to fit a pointer in each object + } + + // For speed, allocate smaller number of total bytes when size is small + size_t total = 100 << 20; + if (size <= 4096) { + total = 30 << 20; + } + + constexpr int kMaxTries = 10; + + for (int i = 0; i < kMaxTries; i++) { + // We are trying to make precise measurements about the overhead of + // allocations. Keep harness-related allocations outside of our probe + // points. + // + // We pin to a CPU and trigger an allocation of the target size to ensure + // that the per-CPU slab has been initialized. + std::vector cpus = AllowedCpus(); + ScopedAffinityMask mask(cpus[0]); + + ::operator delete(::operator new(size)); + + const size_t start_memory = CurrentHeapSize(); + void* list = nullptr; + for (size_t alloc = 0; alloc < total; alloc += size) { + void** object = reinterpret_cast(::operator new(size)); + *object = list; + list = object; + } + const size_t peak_memory = CurrentHeapSize(); + + while (list != nullptr) { + void** object = reinterpret_cast(list); + list = *object; + ::operator delete(object); + } + + if (mask.Tampered()) { + continue; + } + + return peak_memory - start_memory; + } + + return 0; + } +}; + +// Check that percent memory overhead created by sampling under the +// specified allocation pattern is not too large. +TEST_P(SamplingMemoryTest, Overhead) { + const size_t size = GetParam(); + int64_t original = MallocExtension::GetProfileSamplingRate(); + SetSamplingInterval(0); + const ssize_t baseline = HeapGrowth(size); + + SetSamplingInterval(original); + + const ssize_t with_sampling = HeapGrowth(size); + + // Allocating many MB's of memory should trigger some growth. + EXPECT_NE(baseline, 0); + EXPECT_NE(with_sampling, 0); + + const double percent = + (static_cast(with_sampling) - static_cast(baseline)) * + 100.0 / static_cast(baseline); + + // some noise is unavoidable + EXPECT_GE(percent, -1.0) << baseline << " " << with_sampling; + EXPECT_LE(percent, 10.0) << baseline << " " << with_sampling; +} + +std::vector InterestingSizes() { + std::vector ret; + + for (size_t cl = 1; cl < kNumClasses; cl++) { + size_t size = tcmalloc::Static::sizemap()->class_to_size(cl); + ret.push_back(size); + } + // Add one size not covered by sizeclasses + ret.push_back(ret.back() + 1); + return ret; +} + +INSTANTIATE_TEST_SUITE_P(AllSizeClasses, SamplingMemoryTest, + testing::ValuesIn(InterestingSizes()), + testing::PrintToStringParamName()); + +} // namespace +} // namespace tcmalloc + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tcmalloc/testing/sampling_test.cc b/tcmalloc/testing/sampling_test.cc new file mode 100644 index 000000000..7c53c5700 --- /dev/null +++ b/tcmalloc/testing/sampling_test.cc @@ -0,0 +1,168 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This tests ReadStackTraces and ReadGrowthStackTraces. It does this +// by doing a bunch of allocations and then calling those functions. +// A driver shell-script can call this, and then call pprof, and +// verify the expected output. The output is written to +// argv[1].heap and argv[1].growth + +#include +#include +#include +#include +#include + +#include +#include + +#include "benchmark/benchmark.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/attributes.h" +#include "absl/debugging/symbolize.h" +#include "absl/strings/str_cat.h" +#include "absl/types/optional.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/testing/testutil.h" + +namespace tcmalloc { +namespace { + +bool StackMatches(const char *target, const void *const *stack, size_t len) { + char buf[256]; + + for (size_t i = 0; i < len; ++i) { + if (!absl::Symbolize(stack[i], buf, sizeof(buf))) continue; + if (strstr(buf, target) != nullptr) return true; + } + + return false; +} + +template +size_t CountMatchingBytes(const char *target, Profile profile) { + size_t sum = 0; + profile.Iterate([&](const Profile::Sample &e) { + if (e.requested_size == 10000 || !CheckSize) { + if (StackMatches(target, e.stack, e.depth)) { + sum += static_cast(e.sum); + } + } + }); + + return sum; +} + +ABSL_ATTRIBUTE_NOINLINE static void *AllocateAllocate(bool align) { + void* p; + if (align) { + // A 10000 byte allocation aligned to 2K will use a 10K size class + // and get 'charged' identically to malloc(10000). + CHECK_CONDITION(posix_memalign(&p, 2048, 10000) == 0); + } else { + p = malloc(10000); + } + benchmark::DoNotOptimize(p); + return p; +} + +class SamplingTest : public testing::TestWithParam {}; + +TEST_P(SamplingTest, ParamChange) { + static const size_t kIters = 80 * 1000; + std::vector allocs; + allocs.reserve(kIters * 2); + + ScopedGuardedSamplingRate gs(-1); + size_t bytes; + { + ScopedProfileSamplingRate s(GetParam()); + for (int i = 0; i < kIters; ++i) { + // Sample a mix of aligned and unaligned + allocs.push_back(AllocateAllocate(i % 20 == 0)); + } + + bytes = CountMatchingBytes( + "AllocateAllocate", + MallocExtension::SnapshotCurrent(ProfileType::kHeap)); + if (GetParam() > 0) { + EXPECT_LE(500 * 1024 * 1024, bytes); + EXPECT_GE(1000 * 1024 * 1024, bytes); + } else { + EXPECT_EQ(0, bytes); + } + } + + // We change back the samping parameter (~ScopedProfileSamplingRate above) and + // allocate more, *without* deleting the old allocs--we should sample at the + // new rate, and reweighting should correctly blend samples from before and + // after the change. + for (int i = 0; i < kIters; ++i) { + allocs.push_back(AllocateAllocate(i % 20 == 0)); + } + + bytes = CountMatchingBytes( + "AllocateAllocate", MallocExtension::SnapshotCurrent(ProfileType::kHeap)); + if (GetParam() > 0) { + EXPECT_LE(1000 * 1024 * 1024, bytes); + EXPECT_GE(2000 * 1024 * 1024, bytes); + } else { + // samples that don't exist can't be reweighted properly + EXPECT_LE(500 * 1024 * 1024, bytes); + EXPECT_GE(1000 * 1024 * 1024, bytes); + } + + for (auto p : allocs) { + free(p); + } +} + +INSTANTIATE_TEST_SUITE_P(SampleParameters, SamplingTest, + testing::Values(0, 100000), + testing::PrintToStringParamName()); + +ABSL_ATTRIBUTE_NOINLINE static void *AllocateZeroByte() { + void *p = ::operator new(0); + ::benchmark::DoNotOptimize(p); + return p; +} + +TEST(Sampling, AlwaysSampling) { + ScopedGuardedSamplingRate gs(-1); + ScopedProfileSamplingRate s(1); + + static const size_t kIters = 80 * 1000; + std::vector allocs; + allocs.reserve(kIters); + for (int i = 0; i < kIters; ++i) { + allocs.push_back(AllocateZeroByte()); + } + const absl::optional alloc_size = + MallocExtension::GetAllocatedSize(allocs[0]); + ASSERT_THAT(alloc_size, testing::Ne(absl::nullopt)); + EXPECT_GT(*alloc_size, 0); + + size_t bytes = CountMatchingBytes( + "AllocateZeroByte", MallocExtension::SnapshotCurrent(ProfileType::kHeap)); + EXPECT_EQ(*alloc_size * kIters, bytes); + + for (void *p : allocs) { + ::operator delete(p); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/startup_size_test.cc b/tcmalloc/testing/startup_size_test.cc new file mode 100644 index 000000000..7b21a850f --- /dev/null +++ b/tcmalloc/testing/startup_size_test.cc @@ -0,0 +1,76 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Test that the memory used by tcmalloc after the first few malloc +// calls is below a known limit to make sure no huge regression in +// startup size occurs due to a change. +// +// We intentionally do not measure RSS since that is very noisy. For +// example, if the physical memory is not fragmented much, touching a +// single byte might map in a 2MB huge page instead of 4K, which will +// cause wide variations in RSS measurements based on environmental +// conditions. + +#include +#include + +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/base/internal/sysinfo.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/malloc_extension.h" + +namespace tcmalloc { +namespace { + +typedef std::map PropertyMap; + +static size_t Property(const PropertyMap& map, const char* name) { + const PropertyMap::const_iterator iter = map.find(name); + if (iter == map.end()) { + tcmalloc::Log(tcmalloc::kCrash, __FILE__, __LINE__, "name not found", name); + } + return iter->second.value; +} + +TEST(StartupSizeTest, Basic) { + + static const size_t MiB = 1024 * 1024; + PropertyMap map = MallocExtension::GetProperties(); + ASSERT_NE(map.count("tcmalloc.metadata_bytes"), 0) + << "couldn't run - no tcmalloc data. Check your malloc configuration."; + size_t percpu = Property(map, "tcmalloc.cpu_free"); +#ifdef __powerpc64__ + size_t metadata_limit = 20 * MiB; +#else + size_t metadata_limit = 10.1 * MiB; +#endif + // Check whether per-cpu is active + if (percpu > 0) { + // Account for 256KiB per cpu slab + metadata_limit += absl::base_internal::NumCPUs() * 0.25 * MiB; + } + size_t meta = Property(map, "tcmalloc.metadata_bytes"); + size_t physical = Property(map, "generic.physical_memory_used"); + EXPECT_LE(meta, metadata_limit); + // Allow 20% more total physical memory than the virtual memory + // reserved for the metadata. + EXPECT_LE(physical, metadata_limit * 1.2); +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/tcmalloc_regtest.cc b/tcmalloc/testing/tcmalloc_regtest.cc new file mode 100644 index 000000000..b59bd2224 --- /dev/null +++ b/tcmalloc/testing/tcmalloc_regtest.cc @@ -0,0 +1,909 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Unittest for the TCMalloc implementation. +// +// * The test consists of a set of threads. +// * Each thread maintains a set of allocated objects, with +// a bound on the total amount of data in the set. +// * Each allocated object's contents are generated by +// hashing the object pointer, and a generation count +// in the object. This allows us to easily check for +// data corruption. +// * At any given step, the thread can do any of the following: +// a. Allocate an object +// b. Increment an object's generation count and update +// its contents. +// c. Pass the object to another thread +// d. Free an object +// Also, at the end of every step, object(s) are freed to maintain +// the memory upper-bound. + +#define _XOPEN_SOURCE 600 +#define _GNU_SOURCE 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/casts.h" +#include "absl/base/internal/sysinfo.h" +#include "absl/base/macros.h" +#include "absl/base/optimization.h" +#include "absl/random/random.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_format.h" +#include "absl/synchronization/mutex.h" +#include "tcmalloc/internal/declarations.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/parameter_accessors.h" +#include "tcmalloc/malloc_extension.h" +#include "tcmalloc/testing/testutil.h" + +// Windows doesn't define pvalloc and a few other obsolete unix +// functions; nor does it define posix_memalign (which is not obsolete). +#if defined(_WIN32) +# define cfree free +# define valloc malloc +# define pvalloc malloc +static bool kOSSupportsMemalign = false; +static inline void* Memalign(size_t align, size_t size) { + tcmalloc::Log(tcmalloc::kCrash, __FILE__, __LINE__, + "memalign not supported on windows"); +} +static inline int PosixMemalign(void** ptr, size_t align, size_t size) { + tcmalloc::Log(tcmalloc::kCrash, __FILE__, __LINE__, + "posix_memalign not supported on windows"); +} + +// OS X defines posix_memalign in some OS versions but not others; +// it's confusing enough to check that it's easiest to just not to test. +#elif defined(__APPLE__) +static bool kOSSupportsMemalign = false; +static inline void* Memalign(size_t align, size_t size) { + tcmalloc::Log(tcmalloc::kCrash, __FILE__, __LINE__, + "memalign not supported on OS X"); +} +static inline int PosixMemalign(void** ptr, size_t align, size_t size) { + tcmalloc::Log(tcmalloc::kCrash, __FILE__, __LINE__, + "posix_memalign not supported on OS X"); +} + +#else +#define OS_SUPPORTS_MEMALIGN +static bool kOSSupportsMemalign = true; +static inline void* Memalign(size_t align, size_t size) { + return memalign(align, size); +} +static inline int PosixMemalign(void** ptr, size_t align, size_t size) { + return posix_memalign(ptr, align, size); +} + +#endif + +// Testing parameters +// +// When making aligned allocations, we pick a power of two up to 1 << +// kLogMaxMemalign. +const int kLogMaxMemalign = 18; + +using testing::Contains; +using testing::ElementsAre; +using testing::HasSubstr; +using testing::SizeIs; +using testing::UnorderedElementsAre; + +static const int kSizeBits = 8 * sizeof(size_t); +static const size_t kMaxSize = ~static_cast(0); +static const size_t kMaxSignedSize = ((size_t(1) << (kSizeBits-1)) - 1); + +namespace tcmalloc { +extern bool want_hpaa(); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + SetTestResourceLimit(); + + benchmark::RunSpecifiedBenchmarks(); + + return RUN_ALL_TESTS(); +} + +namespace tcmalloc { +namespace { + +TEST(TcmallocTest, EmptyAllocations) { + // Check that empty allocation works + void* p1 = ::operator new(0); + ASSERT_NE(p1, nullptr); + void* p2 = ::operator new(0); + ASSERT_NE(p2, nullptr); + ASSERT_NE(p1, p2); + ::operator delete(p1); + ::operator delete(p2); +} + +TEST(TcmallocTest, LargeAllocation) { + // Check that "lots" of memory can be allocated + constexpr size_t kMB = 1 << 20; + ::operator delete(::operator new(100 * kMB)); +} + +TEST(TcmallocTest, Calloc) { + // Check calloc() with various arguments + + struct TestCase { + size_t n; + size_t s; + bool ok; + }; + + TestCase tests[] = { + {0, 0, true}, + {0, 1, true}, + {1, 1, true}, + {1 << 10, 0, true}, + {1 << 20, 0, true}, + {0, 1 << 10, true}, + {0, 1 << 20, true}, + {1 << 20, 2, true}, + {2, 1 << 20, true}, + {1000, 1000, true}, + {kMaxSize, 2, false}, + {2, kMaxSize, false}, + {kMaxSize, kMaxSize, false}, + {kMaxSignedSize, 3, false}, + {3, kMaxSignedSize, false}, + {kMaxSignedSize, kMaxSignedSize, false}, + }; + + for (auto t : tests) { + SCOPED_TRACE(absl::StrFormat("calloc(%x, %x)", t.n, t.s)); + + void* ptr = calloc(t.n, t.s); + benchmark::DoNotOptimize(ptr); + + EXPECT_EQ(t.ok, ptr != nullptr); + if (ptr != nullptr) { + memset(ptr, 0, t.n * t.s); + benchmark::DoNotOptimize(ptr); + } + + // This is harmless if p == nullptr. + free(ptr); + } +} + +TEST(TcmallocTest, Realloc) { + // Test that realloc doesn't always reallocate and copy memory. + + // When sampling, we always allocate in units of page-size, which makes + // reallocs of small sizes do extra work (thus, failing these checks). Since + // sampling is random, we turn off sampling to make sure that doesn't happen + // to us here. + ScopedProfileSamplingRate s(0); // turn off sampling + + int start_sizes[] = {100, 1000, 10000, 100000}; + int deltas[] = {1, -2, 4, -8, 16, -32, 64, -128}; + + for (int s = 0; s < sizeof(start_sizes) / sizeof(*start_sizes); ++s) { + void* p = malloc(start_sizes[s]); + // We stash a copy of the pointer p so we can reference it later. We must + // work with the return value of p. + // + // Even if we successfully determine that realloc's return value is + // equivalent to its input value, we must use the returned value under + // penalty of UB. + const intptr_t orig_ptr = absl::bit_cast(p); + benchmark::DoNotOptimize(p); + + ASSERT_NE(p, nullptr); + // The larger the start-size, the larger the non-reallocing delta. + for (int d = 0; d < (s + 1) * 2; ++d) { + p = realloc(p, start_sizes[s] + deltas[d]); + const intptr_t new_ptr = absl::bit_cast(p); + benchmark::DoNotOptimize(p); + + ASSERT_EQ(orig_ptr, new_ptr) + << ": realloc should not allocate new memory" + << " (" << start_sizes[s] << " + " << deltas[d] << ")"; + } + // Test again, but this time reallocing smaller first. + for (int d = 0; d < s * 2; ++d) { + p = realloc(p, start_sizes[s] - deltas[d]); + const intptr_t new_ptr = absl::bit_cast(p); + benchmark::DoNotOptimize(p); + + ASSERT_EQ(orig_ptr, new_ptr) + << ": realloc should not allocate new memory" + << " (" << start_sizes[s] << " + " << -deltas[d] << ")"; + } + free(p); + } +} + +TEST(TcmallocTest, MemalignRealloc) { + constexpr size_t kDummySize = 42; + char contents[kDummySize]; + memset(contents, 0x11, kDummySize); + + void* xs[2]; + xs[0] = memalign(16, kDummySize); + ASSERT_EQ(0, posix_memalign(&xs[1], 16, kDummySize)); + + for (void* x : xs) { + memcpy(x, contents, sizeof(contents)); + + ASSERT_NE(nullptr, x); + void* y = realloc(x, 2 * kDummySize); + // Reallocating memory obtained for memalign or posix_memalign should work. + EXPECT_EQ(memcmp(contents, y, sizeof(contents)), 0); + free(y); + } +} + +TEST(TCMallocTest, HugeThreadCache) { + // Allocate more than 2^16 objects to trigger an integer overflow of 16-bit + // counters. + constexpr int kNum = 70000; + constexpr size_t kSize = 10; + std::vector arr; + arr.reserve(kNum); + + for (int i = 0; i < kNum; i++) { + arr.push_back(::operator new(kSize)); + } + + for (int i = 0; i < kNum; i++) { + ::operator delete(arr[i], kSize); + } +} + +TEST(TCMallocTest, EnormousAllocations) { + absl::BitGen rand; + + // Check that asking for stuff tiny bit smaller than largest possible + // size returns NULL. + for (size_t i = 0; i < 70000; i += absl::Uniform(rand, 1, 20)) { + const size_t size = kMaxSize - i; + void* p; + + p = malloc(size); + ASSERT_EQ(nullptr, p); + EXPECT_EQ(ENOMEM, errno); + + p = ::operator new(size, std::nothrow); + ASSERT_EQ(nullptr, p); + p = ::operator new(size, static_cast(16), std::nothrow); + ASSERT_EQ(nullptr, p); + + size_t alignment = sizeof(p) << absl::Uniform(rand, 1, kLogMaxMemalign); + ASSERT_NE(0, alignment); + ASSERT_EQ(0, alignment % sizeof(void*)); + ASSERT_EQ(0, (alignment & (alignment - 1))); + int err = PosixMemalign(&p, alignment, size); + ASSERT_EQ(ENOMEM, err); + } + + // Asking for memory sizes near signed/unsigned boundary (kMaxSignedSize) + // might work or not, depending on the amount of virtual memory. + for (size_t i = 0; i < 100; i++) { + void* p; + p = malloc(kMaxSignedSize + i); + free(p); + p = malloc(kMaxSignedSize - i); + free(p); + } + + for (size_t i = 0; i < 100; i++) { + void* p; + p = ::operator new(kMaxSignedSize + i, std::nothrow); + ::operator delete(p); + p = ::operator new(kMaxSignedSize - i, std::nothrow); + ::operator delete(p); + } + + // Check that ReleaseMemoryToSystem has no visible effect (aka, does not crash + // the test): + MallocExtension::ReleaseMemoryToSystem(std::numeric_limits::max()); +} + +static size_t GetUnmappedBytes() { + absl::optional bytes = + MallocExtension::GetNumericProperty("tcmalloc.pageheap_unmapped_bytes"); + CHECK_CONDITION(bytes.has_value()); + return *bytes; +} + +TEST(TCMallocTest, ReleaseMemoryToSystem) { + // Similarly, the hugepage-aware allocator doesn't agree with PH about + // where release is called for. + if (tcmalloc::want_hpaa()) { + return; + } + + static const int MB = 1048576; + void* a = malloc(MB); + void* b = malloc(MB); + MallocExtension::ReleaseMemoryToSystem(std::numeric_limits::max()); + size_t starting_bytes = GetUnmappedBytes(); + + // Calling ReleaseMemoryToSystem() a second time shouldn't do anything. + MallocExtension::ReleaseMemoryToSystem(std::numeric_limits::max()); + EXPECT_EQ(starting_bytes, GetUnmappedBytes()); + + // ReleaseMemoryToSystem shouldn't do anything either. + MallocExtension::ReleaseMemoryToSystem(MB); + EXPECT_EQ(starting_bytes, GetUnmappedBytes()); + + free(a); + + // The span to release should be 1MB. + MallocExtension::ReleaseMemoryToSystem(MB / 2); + EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes()); + + // Should do nothing since the previous call released too much. + MallocExtension::ReleaseMemoryToSystem(MB / 4); + EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes()); + + free(b); + + // Use up the extra MB/4 bytes from 'a' and also release 'b'. + MallocExtension::ReleaseMemoryToSystem(MB / 2); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + // Should do nothing since the previous call released too much. + MallocExtension::ReleaseMemoryToSystem(MB / 2); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + // Nothing else to release. + MallocExtension::ReleaseMemoryToSystem(std::numeric_limits::max()); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + a = malloc(MB); + free(a); + EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes()); + + // Releasing less than a page should still trigger a release. + MallocExtension::ReleaseMemoryToSystem(1); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); +} + +TEST(TCMallocTest, NothrowSizedDelete) { + struct Foo { + double a; + }; + // Foo should correspond to a size class used by new, but not by malloc. + static_assert(sizeof(Foo) == 8, "Unexpected size for Foo"); + + static constexpr int kNum = 100; + Foo* ptrs[kNum]; + for (int i = 0; i < kNum; i++) { + ptrs[i] = new (std::nothrow) Foo; + } + for (int i = 0; i < kNum; i++) { + delete ptrs[i]; + } +} + +TEST(TCMallocTest, NothrowSizedDeleteArray) { + struct Foo { + ~Foo() {} + double a; + }; + // Foo should correspond to a size class used by new, but not by malloc, + // for some sizes k, sizeof(size_t) + sizeof(Foo) * k. (sizeof(size_t) being + // the size cookie of the implementation.) + static_assert(sizeof(Foo) == 8, "Unexpected size for Foo"); + // With a non-trivially destructible type, we expect the compiler to insert a + // size cookie so it can invoke sized delete[]. + static_assert(!std::is_trivially_destructible::value, + "Foo should not be trivially destructable, for sized delete[]"); + + static constexpr int kNum = 100; + Foo* ptrs[kNum]; + for (int i = 0; i < kNum; i++) { + ptrs[i] = new (std::nothrow) Foo[i % 10]; + } + for (int i = 0; i < kNum; i++) { + delete[] ptrs[i]; + } +} + +TEST(TCMallocTest, MallocAlignment) { + static constexpr int kNum = 100; + + for (int lg = 0; lg < 16; lg++) { + const size_t sizes[3] = { + static_cast((1 << lg) - 1), + static_cast(1 << lg), + static_cast((1 << lg) + 1), + }; + void* ptrs[kNum * ABSL_ARRAYSIZE(sizes)]; + int i = 0; + for (size_t size : sizes) { + for (int j = 0; j < kNum; i++, j++) { + ptrs[i] = malloc(size); + uintptr_t p = reinterpret_cast(ptrs[i]); + ASSERT_EQ(0, p % alignof(std::max_align_t)) << size << " " << j; + } + } + + for (void* ptr : ptrs) { + free(ptr); + } + } +} + +TEST(TCMallocTest, CallocAlignment) { + static constexpr int kNum = 100; + + for (int lg = 0; lg < 16; lg++) { + const size_t sizes[3] = { + static_cast((1 << lg) - 1), + static_cast(1 << lg), + static_cast((1 << lg) + 1), + }; + void* ptrs[kNum * ABSL_ARRAYSIZE(sizes)]; + int i = 0; + for (size_t size : sizes) { + for (int j = 0; j < kNum; i++, j++) { + ptrs[i] = calloc(size, (1 << (j % 5))); + uintptr_t p = reinterpret_cast(ptrs[i]); + ASSERT_EQ(0, p % alignof(std::max_align_t)) << size << " " << j; + } + } + + for (void* ptr : ptrs) { + free(ptr); + } + } +} + +TEST(TCMallocTest, ReallocAlignment) { + static constexpr int kNum = 100; + + for (int lg = 0; lg < 16; lg++) { + const size_t sizes[3] = { + static_cast((1 << lg) - 1), + static_cast(1 << lg), + static_cast((1 << lg) + 1), + }; + void* ptrs[kNum * ABSL_ARRAYSIZE(sizes)]; + int i = 0; + for (size_t size : sizes) { + for (int j = 0; j < kNum; i++, j++) { + ptrs[i] = malloc(size); + uintptr_t p = reinterpret_cast(ptrs[i]); + ASSERT_EQ(0, p % alignof(std::max_align_t)) << size << " " << j; + + const size_t new_size = (1 << (kNum % 16)) + (kNum % 3) - 1; + void* new_ptr = realloc(ptrs[i], new_size); + if (new_ptr == nullptr) { + continue; + } + ptrs[i] = new_ptr; + + p = reinterpret_cast(new_ptr); + ASSERT_EQ(0, p % alignof(std::max_align_t)) + << size << " -> " << new_size << " " << j; + } + } + + for (void* ptr : ptrs) { + free(ptr); + } + } +} + +TEST(TCMallocTest, AlignedNew) { + absl::BitGen rand; + + struct alloc { + void* ptr; + size_t size; + std::align_val_t alignment; + }; + + std::vector allocated; + for (int i = 1; i < 100; ++i) { + alloc a; + a.size = absl::LogUniform(rand, 0, 1 << 20); + a.alignment = static_cast(1 << absl::Uniform(rand, 0, 6)); + + a.ptr = ::operator new(a.size, a.alignment); + ASSERT_NE(a.ptr, nullptr); + ASSERT_EQ(0, reinterpret_cast(a.ptr) % + static_cast(a.alignment)); + allocated.emplace_back(a); + } + for (const auto& p : allocated) { + int choice = absl::Uniform(rand, 0, 3); + + switch (choice) { + case 0: + ::operator delete(p.ptr); + break; + case 1: + ::operator delete(p.ptr, p.alignment); + break; + case 2: + ::operator delete(p.ptr, p.size, p.alignment); + break; + } + } +} + +TEST(TCMallocTest, AlignedNewArray) { + absl::BitGen rand; + + struct alloc { + void* ptr; + size_t size; + std::align_val_t alignment; + }; + + std::vector allocated; + for (int i = 1; i < 100; ++i) { + alloc a; + a.size = absl::LogUniform(rand, 0, 1 << 20); + a.alignment = static_cast(1 << absl::Uniform(rand, 0, 6)); + + a.ptr = ::operator new[](a.size, a.alignment); + ASSERT_NE(a.ptr, nullptr); + ASSERT_EQ(0, reinterpret_cast(a.ptr) % + static_cast(a.alignment)); + allocated.emplace_back(a); + } + for (const auto& p : allocated) { + int choice = absl::Uniform(rand, 0, 3); + + switch (choice) { + case 0: + ::operator delete[](p.ptr); + break; + case 1: + ::operator delete[](p.ptr, p.alignment); + break; + case 2: + ::operator delete[](p.ptr, p.size, p.alignment); + break; + } + } +} + +void CheckSizedDelete() { + absl::BitGen rand; + + std::vector > allocated; + for (int i = 1; i < 100; ++i) { + size_t alloc_size = absl::LogUniform(rand, 0, (1 << 20) - 1); + void* p1 = ::operator new(alloc_size); + ASSERT_NE(p1, nullptr); + allocated.push_back(std::make_pair(p1, alloc_size)); + } + for (std::vector>::const_iterator i = + allocated.begin(); + i != allocated.end(); ++i) { + ::operator delete(i->first, i->second); + } +} + +TEST(TCMallocTest, SizedDelete) { + CheckSizedDelete(); +} + +TEST(TCMallocTest, SizedDeleteSampled) { + ScopedProfileSamplingRate s(1); // Try to sample more. + CheckSizedDelete(); +} + +// Check sampled allocations return the proper size. +TEST(TCMallocTest, SampleAllocatedSize) { + ScopedProfileSamplingRate s(1); // Try to sample more. + + // Do 64 megabytes of allocation; this should (nearly) guarantee we + // get a sample. + for (int i = 0; i < 1024*1024; ++i) { + void* ptr = malloc(64); + ASSERT_EQ(64, MallocExtension::GetAllocatedSize(ptr)); + free(ptr); + } +} + +// Ensure that nallocx works before main. +struct GlobalNallocx { + GlobalNallocx() { CHECK_CONDITION(nallocx(99, 0) >= 99); } +} global_nallocx; + +#ifdef __GNUC__ +// 101 is the max user priority. +static void check_global_nallocx() __attribute__((constructor(101))); +static void check_global_nallocx() { CHECK_CONDITION(nallocx(99, 0) >= 99); } +#endif + +TEST(TCMallocTest, nallocx) { + // Guarded allocations may have a smaller allocated size than nallocx + // predicts. So we disable guarded allocations. + ScopedGuardedSamplingRate gs(-1); + + for (size_t size = 0; size <= (1 << 20); size += 7) { + size_t rounded = nallocx(size, 0); + ASSERT_GE(rounded, size); + void* ptr = operator new(size); + ASSERT_EQ(rounded, MallocExtension::GetAllocatedSize(ptr)); + operator delete(ptr); + } +} + +TEST(TCMallocTest, nallocx_alignment) { + // Guarded allocations may have a smaller allocated size than nallocx + // predicts. So we disable guarded allocations. + ScopedGuardedSamplingRate gs(-1); + + for (size_t size = 0; size <= (1 << 20); size += 7) { + for (size_t align = 0; align < 10; align++) { + size_t rounded = nallocx(size, MALLOCX_LG_ALIGN(align)); + ASSERT_GE(rounded, size); + ASSERT_EQ(rounded % (1 << align), 0); + void* ptr = memalign(1 << align, size); + ASSERT_EQ(rounded, MallocExtension::GetAllocatedSize(ptr)); + free(ptr); + } + } +} + +TEST(TCMallocTest, sdallocx) { + for (size_t size = 0; size <= 4096; size += 7) { + void* ptr = malloc(size); + memset(ptr, 0, size); + benchmark::DoNotOptimize(ptr); + sdallocx(ptr, size, 0); + } +} + +TEST(TCMallocTest, sdallocx_alignment) { + for (size_t size = 0; size <= 4096; size += 7) { + for (size_t align = 3; align <= 10; align++) { + const size_t alignment = 1 << align; + void* ptr; + int err = PosixMemalign(&ptr, alignment, size); + ASSERT_EQ(err, 0) << alignment << " " << size; + ASSERT_EQ(reinterpret_cast(ptr) & (alignment - 1), 0); + memset(ptr, 0, size); + benchmark::DoNotOptimize(ptr); + sdallocx(ptr, size, MALLOCX_LG_ALIGN(align)); + } + } +} + +// Parse out a line like: +// : xxx bytes allocated +// Return xxx as an int, nullopt if it can't be found +absl::optional ParseLowLevelAllocator(absl::string_view allocator_name, + absl::string_view buf) { + char needlebuf[32]; + int len = + absl::SNPrintF(needlebuf, sizeof(needlebuf), "\n%s: ", allocator_name); + CHECK_CONDITION(0 < len && len < sizeof(needlebuf)); + const absl::string_view needle = needlebuf; + + auto pos = buf.find(needle); + if (pos == absl::string_view::npos) { + return absl::nullopt; + } + // skip over the prefix. Should now look like " bytes allocated". + pos += needle.size(); + buf.remove_prefix(pos); + + pos = buf.find_first_not_of(' '); + if (pos != absl::string_view::npos) { + buf.remove_prefix(pos); + } + + pos = buf.find(' '); + if (pos != absl::string_view::npos) { + buf.remove_suffix(buf.size() - pos); + } + + int64_t result; + if (!absl::SimpleAtoi(buf, &result)) { + return absl::nullopt; + } + return result; +} + +TEST(TCMallocTest, GetStatsReportsLowLevel) { + std::string stats = MallocExtension::GetStats(); + fprintf(stderr, "%s\n", stats.c_str()); + + absl::optional low_level_bytes = + ParseLowLevelAllocator("MmapSysAllocator", stats); + ASSERT_THAT(low_level_bytes, testing::Ne(absl::nullopt)); + EXPECT_GT(*low_level_bytes, 0); + size_t heap_size = + *MallocExtension::GetNumericProperty("generic.current_allocated_bytes"); + + // sanity check: we must have allocated as many bytes as in the heap + EXPECT_GE(*low_level_bytes, heap_size); +} + +#if defined(__GLIBC__) && defined(__GNUC__) && !defined(__MACH__) +namespace { +template +void ExpectSameAddresses(T1 v1, T2 v2) { + // C++ language requires a constant folding on constant inputs, + // which may result to returning false for two aliased function, + // because the aliasing is not known at this compilation unit. + // Use volatile here to enforce a runtime comparison. + volatile auto p1 = reinterpret_cast(v1); + volatile auto p2 = reinterpret_cast(v2); + const bool result = p1 == p2; + // EXPECT_EQ seems not be able to handle volatiles. + EXPECT_TRUE(result); +} +} // end unnamed namespace + +TEST(TCMallocTest, TestAliasedFunctions) { + void* (*operator_new)(size_t) = &::operator new; + void* (*operator_new_nothrow)(size_t, const std::nothrow_t&) = + &::operator new; + void* (*operator_new_array)(size_t) = &::operator new[]; + void* (*operator_new_array_nothrow)(size_t, const std::nothrow_t&) = + &::operator new[]; + + ExpectSameAddresses(operator_new, operator_new_array); + ExpectSameAddresses(operator_new_nothrow, operator_new_array_nothrow); + + void (*operator_delete)(void*) = &::operator delete; + void (*operator_delete_nothrow)(void*, const std::nothrow_t&) = + &::operator delete; + void (*operator_delete_array)(void*) = &::operator delete[]; + void (*operator_delete_array_nothrow)(void*, const std::nothrow_t&) = + &::operator delete[]; + + ExpectSameAddresses(&::free, operator_delete); + ExpectSameAddresses(&::free, operator_delete_nothrow); + ExpectSameAddresses(&::free, operator_delete_array); + ExpectSameAddresses(&::free, operator_delete_array_nothrow); +} + +#endif + +TEST(TcmallocSizedNewTest, SizedOperatorNewReturnsExtraCapacity) { + // For release / no sanitizer builds, tcmalloc does return + // the next available class size, which we know is always at least + // properly aligned, so size 3 should always return extra capacity. + sized_ptr_t res = tcmalloc_size_returning_operator_new(3); + EXPECT_THAT(res.n, testing::Ge(8)); + ::operator delete(res.p); +} + +TEST(TcmallocSizedNewTest, NothrowSizedOperatorNewReturnsExtraCapacity) { + // For release / no sanitizer builds, tcmalloc does return + // the next available class size, which we know is always at least + // properly aligned, so size 3 should always return extra capacity. + sized_ptr_t res = tcmalloc_size_returning_operator_new_nothrow(3); + EXPECT_THAT(res.n, testing::Ge(8)); + ::operator delete(res.p); +} + +TEST(TcmallocSizedNewTest, SizedOperatorNew) { + for (size_t size = 0; size < 1024; ++size) { + sized_ptr_t res = tcmalloc_size_returning_operator_new(size); + EXPECT_NE(res.p, nullptr); + EXPECT_GE(res.n, size); + EXPECT_LE(size, std::max(size + 100, 2 * size)); + benchmark::DoNotOptimize(memset(res.p, 0xBF, res.n)); + ::operator delete(res.p); + } +} + +TEST(TcmallocSizedNewTest, NothrowSizedOperatorNew) { + for (size_t size = 0; size < 64 * 1024; ++size) { + sized_ptr_t res = tcmalloc_size_returning_operator_new_nothrow(size); + EXPECT_NE(res.p, nullptr); + EXPECT_GE(res.n, size); + EXPECT_LE(size, std::max(size + 100, 2 * size)); + benchmark::DoNotOptimize(memset(res.p, 0xBF, res.n)); + ::operator delete(res.p); + } +} + +TEST(TcmallocSizedNewTest, InvalidSizedOperatorNewAlwaysFails) { + constexpr size_t kBadSize = std::numeric_limits::max(); + EXPECT_DEATH(tcmalloc_size_returning_operator_new(kBadSize), ".*"); +} + +TEST(TcmallocSizedNewTest, InvalidNothrowSizedOperatorNew) { + constexpr size_t kBadSize = std::numeric_limits::max(); + sized_ptr_t res = tcmalloc_size_returning_operator_new_nothrow(kBadSize); + EXPECT_EQ(res.p, nullptr); + EXPECT_EQ(res.n, 0); +} + +TEST(TcmallocSizedNewTest, SizedOperatorNewMatchesMallocExtensionValue) { + // Set reasonable sampling and guarded sampling probabilities. + ScopedProfileSamplingRate s(20); + ScopedGuardedSamplingRate gs(20); + constexpr size_t kOddIncrement = 117; + + // Traverse clean power 2 / common size class / page sizes + for (size_t size = 32; size <= 2 * 1024 * 1024; size *= 2) { + sized_ptr_t r = tcmalloc_size_returning_operator_new(size); + ASSERT_EQ(r.n, MallocExtension::GetAllocatedSize(r.p)); + ::operator delete(r.p, r.n); + } + + // Traverse randomized sizes + for (size_t size = 32; size <= 2 * 1024 * 1024; size += kOddIncrement) { + sized_ptr_t r = tcmalloc_size_returning_operator_new(size); + ASSERT_EQ(r.n, MallocExtension::GetAllocatedSize(r.p)); + ::operator delete(r.p, r.n); + } +} + +TEST(SizedDeleteTest, SizedOperatorDelete) { + enum DeleteSize { kSize, kCapacity, kHalfway }; + for (size_t size = 0; size < 64 * 1024; ++size) { + for (auto delete_size : {kSize, kCapacity, kHalfway}) { + sized_ptr_t res = tcmalloc_size_returning_operator_new(size); + switch (delete_size) { + case kSize: + ::operator delete(res.p, size); + break; + case kCapacity: + ::operator delete(res.p, res.n); + break; + case kHalfway: + ::operator delete(res.p, (size + res.n) / 2); + break; + } + } + } +} + +TEST(SizedDeleteTest, NothrowSizedOperatorDelete) { + for (size_t size = 0; size < 64 * 1024; ++size) { + sized_ptr_t res = tcmalloc_size_returning_operator_new(size); + ::operator delete(res.p, std::nothrow); + } +} + +} // namespace +} // namespace tcmalloc diff --git a/tcmalloc/testing/testutil.cc b/tcmalloc/testing/testutil.cc new file mode 100644 index 000000000..f1cc94b49 --- /dev/null +++ b/tcmalloc/testing/testutil.cc @@ -0,0 +1,137 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// A few routines that are useful for multiple tests in this directory. + +#include "tcmalloc/testing/testutil.h" + +#include +#include +#include + +#include +#include + +#define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0) + +extern "C" { + struct FunctionAndId { + void (*ptr_to_function)(int); + int id; + }; + +// This helper function has the signature that pthread_create wants. + static void* RunFunctionInThread(void *ptr_to_ptr_to_fn) { + (**static_cast(ptr_to_ptr_to_fn))(); // runs fn + return nullptr; + } + + static void* RunFunctionInThreadWithId(void *ptr_to_fnid) { + FunctionAndId* fn_and_id = static_cast(ptr_to_fnid); + (*fn_and_id->ptr_to_function)(fn_and_id->id); // runs fn + return nullptr; + } + + // Run a function in a thread of its own and wait for it to finish. + // This is useful for tcmalloc testing, because each thread is + // handled separately in tcmalloc, so there's interesting stuff to + // test even if the threads are not running concurrently. + void RunThread(void (*fn)()) { + pthread_t thr; + // Even though fn is on the stack, it's safe to pass a pointer to it, + // because we pthread_join immediately (ie, before RunInThread exits). + SAFE_PTHREAD(pthread_create(&thr, nullptr, RunFunctionInThread, &fn)); + SAFE_PTHREAD(pthread_join(thr, nullptr)); + } + + void RunManyThreads(void (*fn)(), int count) { + pthread_t* thr = new pthread_t[count]; + for (int i = 0; i < count; i++) { + SAFE_PTHREAD(pthread_create(&thr[i], nullptr, RunFunctionInThread, &fn)); + } + for (int i = 0; i < count; i++) { + SAFE_PTHREAD(pthread_join(thr[i], nullptr)); + } + delete[] thr; + } + + void RunManyThreadsWithId(void (*fn)(int), int count, int stacksize) { + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, stacksize); + + pthread_t* thr = new pthread_t[count]; + FunctionAndId* fn_and_ids = new FunctionAndId[count]; + for (int i = 0; i < count; i++) { + fn_and_ids[i].ptr_to_function = fn; + fn_and_ids[i].id = i; + SAFE_PTHREAD(pthread_create(&thr[i], &attr, + RunFunctionInThreadWithId, &fn_and_ids[i])); + } + for (int i = 0; i < count; i++) { + SAFE_PTHREAD(pthread_join(thr[i], nullptr)); + } + delete[] fn_and_ids; + delete[] thr; + + pthread_attr_destroy(&attr); + } +} + + +// When compiled 64-bit and run on systems with swap several unittests will end +// up trying to consume all of RAM+swap, and that can take quite some time. By +// limiting the address-space size we get sufficient coverage without blowing +// out job limits. +void SetTestResourceLimit() { + + // The actual resource we need to set varies depending on which flavour of + // unix. On Linux we need RLIMIT_AS because that covers the use of mmap. + // Otherwise hopefully RLIMIT_RSS is good enough. (Unfortunately 64-bit + // and 32-bit headers disagree on the type of these constants!) +#ifdef RLIMIT_AS +#define USE_RESOURCE RLIMIT_AS +#else +#define USE_RESOURCE RLIMIT_RSS +#endif + + // Restrict the test to 8GiB by default. + // Be careful we don't overflow rlim - if we would, this is a no-op + // and we can just do nothing. + const int64_t lim = static_cast(8) * 1024 * 1024 * 1024; + if (lim > std::numeric_limits::max()) return; + const rlim_t kMaxMem = lim; + + struct rlimit rlim; + if (getrlimit(USE_RESOURCE, &rlim) == 0) { + if (rlim.rlim_cur == RLIM_INFINITY || rlim.rlim_cur > kMaxMem) { + rlim.rlim_cur = kMaxMem; + setrlimit(USE_RESOURCE, &rlim); // ignore result + } + } +} + +namespace tcmalloc { + +std::string GetStatsInPbTxt() { + const int buffer_length = 1500 * 1000; + std::string buf; + buf.resize(buffer_length); + int actual_size = + MallocExtension_Internal_GetStatsInPbtxt(&buf[0], buffer_length); + buf.resize(actual_size); + return buf; +} + +} // namespace tcmalloc diff --git a/tcmalloc/testing/testutil.h b/tcmalloc/testing/testutil.h new file mode 100644 index 000000000..c02a580c6 --- /dev/null +++ b/tcmalloc/testing/testutil.h @@ -0,0 +1,88 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_TESTING_TESTUTIL_H_ +#define TCMALLOC_TESTING_TESTUTIL_H_ + +#include "tcmalloc/malloc_extension.h" + +// Run a function in a thread of its own and wait for it to finish. +// The function you pass in must have the signature +// void MyFunction(); +extern "C" void RunThread(void (*fn)()); + +// Run a function X times, in X threads, and wait for them all to finish. +// The function you pass in must have the signature +// void MyFunction(); +extern "C" void RunManyThreads(void (*fn)(), int count); + +// The 'advanced' version: run a function X times, in X threads, and +// wait for them all to finish. Give them all the specified stack-size. +// (If you're curious why this takes a stacksize and the others don't, +// it's because the one client of this fn wanted to specify stacksize. :-) ) +// The function you pass in must have the signature +// void MyFunction(int idx); +// where idx is the index of the thread (which of the X threads this is). +extern "C" void RunManyThreadsWithId(void (*fn)(int), int count, int stacksize); + +// When compiled 64-bit and run on systems with swap several unittests will end +// up trying to consume all of RAM+swap, and that can take quite some time. By +// limiting the address-space size we get sufficient coverage without blowing +// out job limits. +void SetTestResourceLimit(); + +namespace tcmalloc { + +// Get the TCMalloc stats in textproto format. +std::string GetStatsInPbTxt(); +extern "C" ABSL_ATTRIBUTE_WEAK int MallocExtension_Internal_GetStatsInPbtxt( + char *buffer, int buffer_length); + +class ScopedProfileSamplingRate { + public: + explicit ScopedProfileSamplingRate(int64_t temporary_value) + : previous_(MallocExtension::GetProfileSamplingRate()) { + MallocExtension::SetProfileSamplingRate(temporary_value); + // Reset the per-thread sampler. It may have a very large gap if sampling + // had been disabled. + ::operator delete(::operator new(256 * 1024 * 1024)); + } + + ~ScopedProfileSamplingRate() { + MallocExtension::SetProfileSamplingRate(previous_); + ::operator delete(::operator new(256 * 1024 * 1024)); + } + + private: + int64_t previous_; +}; + +class ScopedGuardedSamplingRate { + public: + explicit ScopedGuardedSamplingRate(int64_t temporary_value) + : previous_(MallocExtension::GetGuardedSamplingRate()) { + MallocExtension::SetGuardedSamplingRate(temporary_value); + } + + ~ScopedGuardedSamplingRate() { + MallocExtension::SetGuardedSamplingRate(previous_); + } + + private: + int64_t previous_; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_TESTING_TESTUTIL_H_ diff --git a/tcmalloc/testing/thread_ctor_unittest_lib.cc b/tcmalloc/testing/thread_ctor_unittest_lib.cc new file mode 100644 index 000000000..da454ab8f --- /dev/null +++ b/tcmalloc/testing/thread_ctor_unittest_lib.cc @@ -0,0 +1,64 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Test for crashes in tcmalloc during shared library initialization +// http://b/3485510. + +#include +#include +#include +#include + +namespace { + +struct Foo { + int x; + Foo() : x(42) { } +}; + +static void *fn(void *) +{ + while (true) { + std::vector v; + v.reserve(1000); + for (int i = 0; i < 1000; ++i) { + v.push_back(new Foo); + } + for (int i = 0; i < 1000; ++i) { + assert(v[i]->x == 42); + delete v[i]; + } + } + return nullptr; +} + +#ifndef NTHR +#define NTHR 10 +#endif + +static pthread_t Init() { + pthread_t tid[NTHR]; + for (uintptr_t i = 0; i < NTHR; ++i) { + pthread_create(&tid[i], nullptr, fn, (void *)i); + } + return tid[0]; +} + +pthread_t ignored_init_result = Init(); + +} // namespace + +// This is used to pull in this object from archive +// (when built with --dynamic_mode=off). +pthread_t *Func() { return &ignored_init_result; } diff --git a/tcmalloc/testing/thread_ctor_unittest_main.cc b/tcmalloc/testing/thread_ctor_unittest_main.cc new file mode 100644 index 000000000..9914cad50 --- /dev/null +++ b/tcmalloc/testing/thread_ctor_unittest_main.cc @@ -0,0 +1,24 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Test for crashes in tcmalloc during shared library initialization +// http://b/3485510. + +#include + +extern pthread_t *Func(); + +int main() { + Func(); +} diff --git a/tcmalloc/thread_cache.cc b/tcmalloc/thread_cache.cc new file mode 100644 index 000000000..577780b93 --- /dev/null +++ b/tcmalloc/thread_cache.cc @@ -0,0 +1,414 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/thread_cache.h" + +#include + +#include "absl/base/internal/spinlock.h" +#include "absl/base/macros.h" +#include "tcmalloc/transfer_cache.h" + +namespace tcmalloc { + +size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize; +size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize; +int64_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize; +ThreadCache* ThreadCache::thread_heaps_ = nullptr; +int ThreadCache::thread_heap_count_ = 0; +ThreadCache* ThreadCache::next_memory_steal_ = nullptr; +#ifdef ABSL_HAVE_TLS +__thread ThreadCache* ThreadCache::thread_local_data_ + ABSL_ATTRIBUTE_INITIAL_EXEC = nullptr; +#endif +ABSL_CONST_INIT bool ThreadCache::tsd_inited_ = false; +pthread_key_t ThreadCache::heap_key_; + +void ThreadCache::Init(pthread_t tid) { + size_ = 0; + + max_size_ = 0; + IncreaseCacheLimitLocked(); + if (max_size_ == 0) { + // There isn't enough memory to go around. Just give the minimum to + // this thread. + max_size_ = kMinThreadCacheSize; + + // Take unclaimed_cache_space_ negative. + unclaimed_cache_space_ -= kMinThreadCacheSize; + ASSERT(unclaimed_cache_space_ < 0); + } + + next_ = nullptr; + prev_ = nullptr; + tid_ = tid; + in_setspecific_ = false; + for (size_t cl = 0; cl < kNumClasses; ++cl) { + list_[cl].Init(); + } +} + +void ThreadCache::Cleanup() { + // Put unused memory back into central cache + for (int cl = 0; cl < kNumClasses; ++cl) { + if (list_[cl].length() > 0) { + ReleaseToCentralCache(&list_[cl], cl, list_[cl].length()); + } + } +} + +// Remove some objects of class "cl" from central cache and add to thread heap. +// On success, return the first object for immediate use; otherwise return NULL. +void* ThreadCache::FetchFromCentralCache(size_t cl, size_t byte_size) { + FreeList* list = &list_[cl]; + ASSERT(list->empty()); + const int batch_size = Static::sizemap()->num_objects_to_move(cl); + + const int num_to_move = std::min(list->max_length(), batch_size); + void* batch[kMaxObjectsToMove]; + int fetch_count = + Static::transfer_cache()[cl].RemoveRange(batch, num_to_move); + if (fetch_count == 0) { + return nullptr; + } + + if (--fetch_count > 0) { + size_ += byte_size * fetch_count; + list->PushBatch(fetch_count, batch + 1); + } + + // Increase max length slowly up to batch_size. After that, + // increase by batch_size in one shot so that the length is a + // multiple of batch_size. + if (list->max_length() < batch_size) { + list->set_max_length(list->max_length() + 1); + } else { + // Don't let the list get too long. In 32 bit builds, the length + // is represented by a 16 bit int, so we need to watch out for + // integer overflow. + int new_length = std::min(list->max_length() + batch_size, + kMaxDynamicFreeListLength); + // The list's max_length must always be a multiple of batch_size, + // and kMaxDynamicFreeListLength is not necessarily a multiple + // of batch_size. + new_length -= new_length % batch_size; + ASSERT(new_length % batch_size == 0); + list->set_max_length(new_length); + } + return batch[0]; +} + +void ThreadCache::ListTooLong(FreeList* list, size_t cl) { + const int batch_size = Static::sizemap()->num_objects_to_move(cl); + ReleaseToCentralCache(list, cl, batch_size); + + // If the list is too long, we need to transfer some number of + // objects to the central cache. Ideally, we would transfer + // num_objects_to_move, so the code below tries to make max_length + // converge on num_objects_to_move. + + if (list->max_length() < batch_size) { + // Slow start the max_length so we don't overreserve. + list->set_max_length(list->max_length() + 1); + } else if (list->max_length() > batch_size) { + // If we consistently go over max_length, shrink max_length. If we don't + // shrink it, some amount of memory will always stay in this freelist. + list->set_length_overages(list->length_overages() + 1); + if (list->length_overages() > kMaxOverages) { + ASSERT(list->max_length() > batch_size); + list->set_max_length(list->max_length() - batch_size); + list->set_length_overages(0); + } + } +} + +// Remove some objects of class "cl" from thread heap and add to central cache +void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) { + ASSERT(src == &list_[cl]); + if (N > src->length()) N = src->length(); + size_t delta_bytes = N * Static::sizemap()->class_to_size(cl); + + // We return prepackaged chains of the correct size to the central cache. + void* batch[kMaxObjectsToMove]; + int batch_size = Static::sizemap()->num_objects_to_move(cl); + while (N > batch_size) { + src->PopBatch(batch_size, batch); + static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove, + "not enough space in batch"); + Static::transfer_cache()[cl].InsertRange(absl::Span(batch), + batch_size); + N -= batch_size; + } + src->PopBatch(N, batch); + static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove, + "not enough space in batch"); + Static::transfer_cache()[cl].InsertRange(absl::Span(batch), N); + size_ -= delta_bytes; +} + +// Release idle memory to the central cache +void ThreadCache::Scavenge() { + // If the low-water mark for the free list is L, it means we would + // not have had to allocate anything from the central cache even if + // we had reduced the free list size by L. We aim to get closer to + // that situation by dropping L/2 nodes from the free list. This + // may not release much memory, but if so we will call scavenge again + // pretty soon and the low-water marks will be high on that call. + for (int cl = 0; cl < kNumClasses; cl++) { + FreeList* list = &list_[cl]; + const int lowmark = list->lowwatermark(); + if (lowmark > 0) { + const int drop = (lowmark > 1) ? lowmark/2 : 1; + ReleaseToCentralCache(list, cl, drop); + + // Shrink the max length if it isn't used. Only shrink down to + // batch_size -- if the thread was active enough to get the max_length + // above batch_size, it will likely be that active again. If + // max_length shinks below batch_size, the thread will have to + // go through the slow-start behavior again. The slow-start is useful + // mainly for threads that stay relatively idle for their entire + // lifetime. + const int batch_size = Static::sizemap()->num_objects_to_move(cl); + if (list->max_length() > batch_size) { + list->set_max_length( + std::max(list->max_length() - batch_size, batch_size)); + } + } + list->clear_lowwatermark(); + } + + IncreaseCacheLimit(); +} + +void ThreadCache::DeallocateSlow(void* ptr, FreeList* list, size_t cl) { + tracking::Report(kFreeMiss, cl, 1); + if (ABSL_PREDICT_FALSE(list->length() > list->max_length())) { + tracking::Report(kFreeTruncations, cl, 1); + ListTooLong(list, cl); + } + if (size_ >= max_size_) { + tracking::Report(kFreeScavenges, cl, 1); + Scavenge(); + } +} + +void ThreadCache::IncreaseCacheLimit() { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + IncreaseCacheLimitLocked(); +} + +void ThreadCache::IncreaseCacheLimitLocked() { + if (unclaimed_cache_space_ > 0) { + // Possibly make unclaimed_cache_space_ negative. + unclaimed_cache_space_ -= kStealAmount; + max_size_ += kStealAmount; + return; + } + // Don't hold pageheap_lock too long. Try to steal from 10 other + // threads before giving up. The i < 10 condition also prevents an + // infinite loop in case none of the existing thread heaps are + // suitable places to steal from. + for (int i = 0; i < 10; + ++i, next_memory_steal_ = next_memory_steal_->next_) { + // Reached the end of the linked list. Start at the beginning. + if (next_memory_steal_ == nullptr) { + ASSERT(thread_heaps_ != nullptr); + next_memory_steal_ = thread_heaps_; + } + if (next_memory_steal_ == this || + next_memory_steal_->max_size_ <= kMinThreadCacheSize) { + continue; + } + next_memory_steal_->max_size_ -= kStealAmount; + max_size_ += kStealAmount; + + next_memory_steal_ = next_memory_steal_->next_; + return; + } +} + +void ThreadCache::InitTSD() { + ASSERT(!tsd_inited_); + pthread_key_create(&heap_key_, DestroyThreadCache); + tsd_inited_ = true; +} + +ThreadCache* ThreadCache::CreateCacheIfNecessary() { + // Initialize per-thread data if necessary + Static::InitIfNecessary(); + ThreadCache* heap = nullptr; + +#ifdef ABSL_HAVE_TLS + const bool maybe_reentrant = !tsd_inited_; + // If we have set up our TLS, we can avoid a scan of the thread_heaps_ list. + if (tsd_inited_) { + if (thread_local_data_) { + return thread_local_data_; + } + } +#else + const bool maybe_reentrant = true; +#endif + + { + absl::base_internal::SpinLockHolder h(&pageheap_lock); + const pthread_t me = pthread_self(); + + // This may be a recursive malloc call from pthread_setspecific() + // In that case, the heap for this thread has already been created + // and added to the linked list. So we search for that first. + if (maybe_reentrant) { + for (ThreadCache* h = thread_heaps_; h != nullptr; h = h->next_) { + if (h->tid_ == me) { + heap = h; + break; + } + } + } + + if (heap == nullptr) { + heap = NewHeap(me); + } + } + + // We call pthread_setspecific() outside the lock because it may + // call malloc() recursively. We check for the recursive call using + // the "in_setspecific_" flag so that we can avoid calling + // pthread_setspecific() if we are already inside pthread_setspecific(). + if (!heap->in_setspecific_ && tsd_inited_) { + heap->in_setspecific_ = true; +#ifdef ABSL_HAVE_TLS + // Also keep a copy in __thread for faster retrieval + thread_local_data_ = heap; +#endif + pthread_setspecific(heap_key_, heap); + heap->in_setspecific_ = false; + } + return heap; +} + +ThreadCache* ThreadCache::NewHeap(pthread_t tid) { + // Create the heap and add it to the linked list + ThreadCache *heap = Static::threadcache_allocator()->New(); + heap->Init(tid); + heap->next_ = thread_heaps_; + heap->prev_ = nullptr; + if (thread_heaps_ != nullptr) { + thread_heaps_->prev_ = heap; + } else { + // This is the only thread heap at the momment. + ASSERT(next_memory_steal_ == nullptr); + next_memory_steal_ = heap; + } + thread_heaps_ = heap; + thread_heap_count_++; + return heap; +} + +void ThreadCache::BecomeIdle() { + if (!tsd_inited_) return; // No caches yet + ThreadCache* heap = GetCacheIfPresent(); + if (heap == nullptr) return; // No thread cache to remove + if (heap->in_setspecific_) return; // Do not disturb the active caller + + heap->in_setspecific_ = true; + pthread_setspecific(heap_key_, nullptr); +#ifdef ABSL_HAVE_TLS + // Also update the copy in __thread + thread_local_data_ = nullptr; +#endif + heap->in_setspecific_ = false; + if (GetCacheIfPresent() == heap) { + // Somehow heap got reinstated by a recursive call to malloc + // from pthread_setspecific. We give up in this case. + return; + } + + // We can now get rid of the heap + DeleteCache(heap); +} + +void ThreadCache::DestroyThreadCache(void* ptr) { + // Note that "ptr" cannot be NULL since pthread promises not + // to invoke the destructor on NULL values, but for safety, + // we check anyway. + if (ptr != nullptr) { +#ifdef ABSL_HAVE_TLS + thread_local_data_ = nullptr; +#endif + DeleteCache(reinterpret_cast(ptr)); + } +} + +void ThreadCache::DeleteCache(ThreadCache* heap) { + // Remove all memory from heap + heap->Cleanup(); + + // Remove from linked list + absl::base_internal::SpinLockHolder h(&pageheap_lock); + if (heap->next_ != nullptr) heap->next_->prev_ = heap->prev_; + if (heap->prev_ != nullptr) heap->prev_->next_ = heap->next_; + if (thread_heaps_ == heap) thread_heaps_ = heap->next_; + thread_heap_count_--; + + if (next_memory_steal_ == heap) next_memory_steal_ = heap->next_; + if (next_memory_steal_ == nullptr) next_memory_steal_ = thread_heaps_; + unclaimed_cache_space_ += heap->max_size_; + + Static::threadcache_allocator()->Delete(heap); +} + +void ThreadCache::RecomputePerThreadCacheSize() { + // Divide available space across threads + int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1; + size_t space = overall_thread_cache_size_ / n; + + // Limit to allowed range + if (space < kMinThreadCacheSize) space = kMinThreadCacheSize; + if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize; + + double ratio = space / std::max(1, per_thread_cache_size_); + size_t claimed = 0; + for (ThreadCache* h = thread_heaps_; h != nullptr; h = h->next_) { + // Increasing the total cache size should not circumvent the + // slow-start growth of max_size_. + if (ratio < 1.0) { + h->max_size_ *= ratio; + } + claimed += h->max_size_; + } + unclaimed_cache_space_ = overall_thread_cache_size_ - claimed; + per_thread_cache_size_ = space; +} + +void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) { + for (ThreadCache* h = thread_heaps_; h != nullptr; h = h->next_) { + *total_bytes += h->Size(); + if (class_count) { + for (int cl = 0; cl < kNumClasses; ++cl) { + class_count[cl] += h->freelist_length(cl); + } + } + } +} + +void ThreadCache::set_overall_thread_cache_size(size_t new_size) { + // Clip the value to a reasonable minimum + if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize; + overall_thread_cache_size_ = new_size; + + RecomputePerThreadCacheSize(); +} + +} // namespace tcmalloc diff --git a/tcmalloc/thread_cache.h b/tcmalloc/thread_cache.h new file mode 100644 index 000000000..5b63c06fc --- /dev/null +++ b/tcmalloc/thread_cache.h @@ -0,0 +1,349 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_THREAD_CACHE_H_ +#define TCMALLOC_THREAD_CACHE_H_ + +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/base/optimization.h" +#include "absl/base/thread_annotations.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/linked_list.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/page_heap_allocator.h" +#include "tcmalloc/sampler.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/tracking.h" + +namespace tcmalloc { + +//------------------------------------------------------------------- +// Data kept per thread +//------------------------------------------------------------------- + +class ThreadCache { + public: + void Init(pthread_t tid) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + void Cleanup(); + + // Accessors (mostly just for printing stats) + int freelist_length(size_t cl) const { return list_[cl].length(); } + + // Total byte size in cache + size_t Size() const { return size_; } + + // Allocate an object of the given size class. When allocation fails + // (from this cache and after running FetchFromCentralCache), + // OOMHandler(size) is called and its return value is + // returned from Allocate. OOMHandler is used to parameterize + // out-of-memory handling (raising exception, returning nullptr, + // calling new_handler or anything else). "Passing" OOMHandler in + // this way allows Allocate to be used in tail-call position in + // fast-path, making allocate tail-call slow path code. + template + void* Allocate(size_t cl); + + void Deallocate(void* ptr, size_t cl); + + void Scavenge(); + + Sampler* GetSampler(); + + static void InitTSD(); + static ThreadCache* GetCache(); + static ThreadCache* GetCacheIfPresent(); + static ThreadCache* CreateCacheIfNecessary(); + static void BecomeIdle(); + + // returns stats on total thread caches created/used + static inline AllocatorStats HeapStats() + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Adds to *total_bytes the total number of bytes used by all thread heaps. + // Also, if class_count is not NULL, it must be an array of size kNumClasses, + // and this function will increment each element of class_count by the number + // of items in all thread-local freelists of the corresponding size class. + static void GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Sets the total thread cache size to new_size, recomputing the + // individual thread cache sizes as necessary. + static void set_overall_thread_cache_size(size_t new_size) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + static size_t overall_thread_cache_size() + SHARED_LOCKS_REQUIRED(pageheap_lock) { + return overall_thread_cache_size_; + } + + template + void* ABSL_ATTRIBUTE_NOINLINE AllocateSlow(size_t cl, size_t allocated_size) { + tracking::Report(kMallocMiss, cl, 1); + void* ret = FetchFromCentralCache(cl, allocated_size); + if (ABSL_PREDICT_TRUE(ret != nullptr)) { + return ret; + } + return OOMHandler(allocated_size); + } + + private: + // We inherit rather than include the list as a data structure to reduce + // compiler padding. Without inheritance, the compiler pads the list + // structure and then adds it as a member, even though we could fit everything + // without padding. + class FreeList : public LinkedList { + private: + uint32_t lowater_; // Low water mark for list length. + uint32_t max_length_; // Dynamic max list length based on usage. + // Tracks the number of times a deallocation has caused + // length_ > max_length_. After the kMaxOverages'th time, max_length_ + // shrinks and length_overages_ is reset to zero. + uint32_t length_overages_; + + // This extra unused field pads FreeList size to 32 bytes on 64 + // bit machines, helping compiler generate faster code for + // indexing array of lists. + void* ABSL_ATTRIBUTE_UNUSED extra_; + + public: + void Init() { + LinkedList::Init(); + lowater_ = 0; + max_length_ = 1; + length_overages_ = 0; + } + + // Return the maximum length of the list. + size_t max_length() const { + return max_length_; + } + + // Set the maximum length of the list. If 'new_max' > length(), the + // client is responsible for removing objects from the list. + void set_max_length(size_t new_max) { + max_length_ = new_max; + } + + // Return the number of times that length() has gone over max_length(). + size_t length_overages() const { + return length_overages_; + } + + void set_length_overages(size_t new_count) { + length_overages_ = new_count; + } + + // Low-water mark management + int lowwatermark() const { return lowater_; } + void clear_lowwatermark() { lowater_ = length(); } + + ABSL_ATTRIBUTE_ALWAYS_INLINE bool TryPop(void** ret) { + bool out = LinkedList::TryPop(ret); + if (ABSL_PREDICT_TRUE(out) && ABSL_PREDICT_FALSE(length() < lowater_)) { + lowater_ = length(); + } + return out; + } + + void PopBatch(int N, void** batch) { + LinkedList::PopBatch(N, batch); + if (length() < lowater_) lowater_ = length(); + } + }; + +// we've deliberately introduced unused extra_ field into FreeList +// to pad the size. Lets ensure that it is still working as +// intended. +#ifdef _LP64 + static_assert(sizeof(FreeList) == 32, "Freelist size has changed"); +#endif + + // Gets and returns an object from the central cache, and, if possible, + // also adds some objects of that size class to this thread cache. + void* FetchFromCentralCache(size_t cl, size_t byte_size); + + // Releases some number of items from src. Adjusts the list's max_length + // to eventually converge on num_objects_to_move(cl). + void ListTooLong(FreeList* list, size_t cl); + + void DeallocateSlow(void* ptr, FreeList* list, size_t cl); + + // Releases N items from this thread cache. + void ReleaseToCentralCache(FreeList* src, size_t cl, int N); + + // Increase max_size_ by reducing unclaimed_cache_space_ or by + // reducing the max_size_ of some other thread. In both cases, + // the delta is kStealAmount. + void IncreaseCacheLimit(); + + // Same as above but called with pageheap_lock held. + void IncreaseCacheLimitLocked() EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // If TLS is available, we also store a copy of the per-thread object + // in a __thread variable since __thread variables are faster to read + // than pthread_getspecific(). We still need pthread_setspecific() + // because __thread variables provide no way to run cleanup code when + // a thread is destroyed. + // + // We also give a hint to the compiler to use the "initial exec" TLS + // model. This is faster than the default TLS model, at the cost that + // you cannot dlopen this library. (To see the difference, look at + // the CPU use of __tls_get_addr with and without this attribute.) + // + // Since using dlopen on a malloc replacement is asking for trouble in any + // case, that's a good tradeoff for us. +#ifdef ABSL_HAVE_TLS + static __thread ThreadCache* thread_local_data_ ABSL_ATTRIBUTE_INITIAL_EXEC; +#endif + + // Thread-specific key. Initialization here is somewhat tricky + // because some Linux startup code invokes malloc() before it + // is in a good enough state to handle pthread_keycreate(). + // Therefore, we use TSD keys only after tsd_inited is set to true. + // Until then, we use a slow path to get the heap object. + static bool tsd_inited_; + static pthread_key_t heap_key_; + + // Linked list of heap objects. + static ThreadCache* thread_heaps_ GUARDED_BY(pageheap_lock); + static int thread_heap_count_ GUARDED_BY(pageheap_lock); + + // A pointer to one of the objects in thread_heaps_. Represents + // the next ThreadCache from which a thread over its max_size_ should + // steal memory limit. Round-robin through all of the objects in + // thread_heaps_. + static ThreadCache* next_memory_steal_ GUARDED_BY(pageheap_lock); + + // Overall thread cache size. + static size_t overall_thread_cache_size_ GUARDED_BY(pageheap_lock); + + // Global per-thread cache size. + static size_t per_thread_cache_size_ GUARDED_BY(pageheap_lock); + + // Represents overall_thread_cache_size_ minus the sum of max_size_ + // across all ThreadCaches. We use int64_t even in 32-bit builds because + // with enough ThreadCaches, this number can get smaller than -2^31. + static int64_t unclaimed_cache_space_ GUARDED_BY(pageheap_lock); + + // This class is laid out with the most frequently used fields + // first so that hot elements are placed on the same cache line. + + FreeList list_[kNumClasses]; // Array indexed by size-class + + size_t size_; // Combined size of data + size_t max_size_; // size_ > max_size_ --> Scavenge() + +#ifndef ABSL_HAVE_TLS + // We sample allocations, biased by the size of the allocation. + // If we have TLS, then we use sampler defined in tcmalloc.cc. + Sampler sampler_; +#endif + + pthread_t tid_; + bool in_setspecific_; + + // Allocate a new heap. + static ThreadCache* NewHeap(pthread_t tid) + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // Use only as pthread thread-specific destructor function. + static void DestroyThreadCache(void* ptr); + + static void DeleteCache(ThreadCache* heap); + static void RecomputePerThreadCacheSize() + EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + public: + // All ThreadCache objects are kept in a linked list (for stats collection) + ThreadCache* next_; + ThreadCache* prev_; + + private: +#ifdef ABSL_CACHELINE_SIZE + // Ensure that two instances of this class are never on the same cache line. + // This is critical for performance, as false sharing would negate many of + // the benefits of a per-thread cache. + char padding_[ABSL_CACHELINE_SIZE]; +#endif +}; + +inline AllocatorStats ThreadCache::HeapStats() { + return Static::threadcache_allocator()->stats(); +} + +#ifndef ABSL_HAVE_TLS +inline Sampler* ThreadCache::GetSampler() { return &sampler_; } +#endif + +template +inline void* ABSL_ATTRIBUTE_ALWAYS_INLINE ThreadCache::Allocate(size_t cl) { + const size_t allocated_size = Static::sizemap()->class_to_size(cl); + + FreeList* list = &list_[cl]; + void* ret; + if (ABSL_PREDICT_TRUE(list->TryPop(&ret))) { + tracking::Report(kMallocHit, cl, 1); + size_ -= allocated_size; + return ret; + } + + return AllocateSlow(cl, allocated_size); +} + +inline void ABSL_ATTRIBUTE_ALWAYS_INLINE ThreadCache::Deallocate(void* ptr, + size_t cl) { + FreeList* list = &list_[cl]; + size_ += Static::sizemap()->class_to_size(cl); + ssize_t size_headroom = max_size_ - size_ - 1; + + list->Push(ptr); + ssize_t list_headroom = + static_cast(list->max_length()) - list->length(); + + // There are two relatively uncommon things that require further work. + // In the common case we're done, and in that case we need a single branch + // because of the bitwise-or trick that follows. + if ((list_headroom | size_headroom) < 0) { + DeallocateSlow(ptr, list, cl); + } else { + tracking::Report(kFreeHit, cl, 1); + } +} + +inline ThreadCache* ABSL_ATTRIBUTE_ALWAYS_INLINE +ThreadCache::GetCacheIfPresent() { +#ifdef ABSL_HAVE_TLS + // __thread is faster + return thread_local_data_; +#else + return tsd_inited_ + ? reinterpret_cast(pthread_getspecific(heap_key_)) + : nullptr; +#endif +} + +inline ThreadCache* ThreadCache::GetCache() { + ThreadCache* tc = GetCacheIfPresent(); + return (ABSL_PREDICT_TRUE(tc != nullptr)) ? tc : CreateCacheIfNecessary(); +} + +} // namespace tcmalloc + +#endif // TCMALLOC_THREAD_CACHE_H_ diff --git a/tcmalloc/tracking.h b/tcmalloc/tracking.h new file mode 100644 index 000000000..465920e39 --- /dev/null +++ b/tcmalloc/tracking.h @@ -0,0 +1,110 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_TRACKING_H_ +#define TCMALLOC_TRACKING_H_ +// Optional support for tracking various stats in tcmalloc. For each +// sizeclass, we track: +// * # of mallocs +// * ...that hit the fast path +// * # of frees +// * ...that hit the fast path +// +// both on each CPU and on each thread. +// +// If disabled (TCMALLOC_TRACK_ALLOCS not defined), it has no runtime cost in +// time or space. +// +// If enabled and an implementation provided, we issue calls to record various +// statistics about cache hit rates. + +#include +#include + +#include +#include + +#include "absl/base/internal/per_thread_tls.h" +#include "absl/base/internal/spinlock.h" +#include "tcmalloc/common.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/percpu.h" +#include "tcmalloc/malloc_extension.h" + +// Uncomment here or pass --copt=-DTCMALLOC_TRACK_ALLOCS at build time if you +// want tracking. +#ifndef TCMALLOC_TRACK_ALLOCS +// #define TCMALLOC_TRACK_ALLOCS +#endif +namespace tcmalloc { + +#if 1 +#define TCMALLOC_HAVE_TRACKING 0 +#endif + +// We track various kinds of events on each thread and each cpu. Each +// event is broken down by sizeclass where it happened. +// To track a new event, add a enum value here, insert calls to +// Tracking::Report() where the event occurs, and add a printable name +// to the event in kTrackingStatNames (in tracking.cc). Optionally +// print the stat somehow in State::Print. +enum TrackingStat { + kMallocHit = 0, // malloc that took the fast path + kMallocMiss = 1, // malloc that didn't + kFreeHit = 2, // ibid. for free + kFreeMiss = 3, + kFreeScavenges = 4, // # of frees that leads to scavenge + kFreeTruncations = 5, // # of frees that leads to list truncation + kTCInsertHit = 6, // # of times the returned object list hits transfer cache. + kTCInsertMiss = 7, // # of times the object list misses the transfer cache. + kTCRemoveHit = 8, // # of times object list fetching hits transfer cache. + kTCRemoveMiss = 9, // # of times object list fetching misses transfer cache. + kNumTrackingStats = 10, +}; + +namespace tracking { + +// Report occurences of associated with sizeclass . +void Report(TrackingStat stat, size_t cl, ssize_t count); + +// Dump all tracking data to . We could support various other +// mechanisms for data delivery without too much trouble... +void Print(TCMalloc_Printer *out); + +// Call before a thread will die (ideally after its last malloc call!) +// so we don't lose its statistics. +void ReportThreadDeath(); + +// Call on startup during tcmalloc initialization. +void Init(); + +// Fill with information for each stat type (broken down by +// sizeclass if level == kDetailed.) +void GetProperties(std::map* result); + +#if !TCMALLOC_HAVE_TRACKING +// no tracking, these are all no-ops +inline void Report(TrackingStat stat, size_t cl, ssize_t count) {} +inline void RegisterNewThreadIfNecessary() {} +inline void Print(TCMalloc_Printer *out) {} +inline void ReportThreadDeath() {} +inline void Init() {} +inline void GetProperties( + std::map* result) {} +#endif + +} // namespace tracking +} // namespace tcmalloc + +#endif // TCMALLOC_TRACKING_H_ diff --git a/tcmalloc/transfer_cache.cc b/tcmalloc/transfer_cache.cc new file mode 100644 index 000000000..3d366d409 --- /dev/null +++ b/tcmalloc/transfer_cache.cc @@ -0,0 +1,281 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/transfer_cache.h" + +#include + +#include +#include + +#include "tcmalloc/common.h" +#include "tcmalloc/experiment.h" +#include "tcmalloc/internal/linked_list.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/tracking.h" + +namespace tcmalloc { +#ifndef TCMALLOC_SMALL_BUT_SLOW +void TransferCache::Init(size_t cl) { + absl::base_internal::SpinLockHolder h(&lock_); + freelist_.Init(cl); + + // We need at least 2 slots to store list head and tail. + ASSERT(kMinObjectsToMove >= 2); + + // Cache this value, for performance. + arbitrary_transfer_ = + IsExperimentActive(Experiment::TCMALLOC_ARBITRARY_TRANSFER); + + slots_ = nullptr; + max_cache_slots_ = 0; + int32_t cache_slots = 0; + + if (cl > 0) { + // Limit the maximum size of the cache based on the size class. If this + // is not done, large size class objects will consume a lot of memory if + // they just sit in the transfer cache. + size_t bytes = Static::sizemap()->class_to_size(cl); + size_t objs_to_move = Static::sizemap()->num_objects_to_move(cl); + ASSERT(objs_to_move > 0 && bytes > 0); + + // Starting point for the maximum number of entries in the transfer cache. + // This actual maximum for a given size class may be lower than this + // maximum value. + max_cache_slots_ = 64 * objs_to_move; + // A transfer cache freelist can have anywhere from 0 to + // max_cache_slots_ slots to put link list chains into. + cache_slots = 16 * objs_to_move; + + // Limit each size class cache to at most 1MB of objects or one entry, + // whichever is greater. Total transfer cache memory used across all + // size classes then can't be greater than approximately + // 1MB * kMaxNumTransferEntries. + max_cache_slots_ = std::min( + max_cache_slots_, + std::max(objs_to_move, (1024 * 1024) / (bytes * objs_to_move) * + objs_to_move)); + cache_slots = std::min(cache_slots, max_cache_slots_); + slots_ = reinterpret_cast( + Static::arena()->Alloc(max_cache_slots_ * sizeof(void *))); + } + used_slots_.store(0, std::memory_order_relaxed); + cache_slots_.store(cache_slots, std::memory_order_relaxed); + ASSERT(cache_slots <= max_cache_slots_); +} + +bool TransferCache::EvictRandomSizeClass(int locked_size_class, bool force) { + static std::atomic race_counter{1}; + int t = race_counter.load(std::memory_order_relaxed); + race_counter.store(t + 1, std::memory_order_relaxed); + if (t >= kNumClasses) { + while (t >= kNumClasses) { + t -= kNumClasses - 1; // Don't want t == 0 + } + race_counter.store(t, std::memory_order_relaxed); + } + ASSERT(t > 0); + ASSERT(t < kNumClasses); + if (t == locked_size_class) return false; + return Static::transfer_cache()[t].ShrinkCache(locked_size_class, force); +} + +bool TransferCache::MakeCacheSpace(int N) { + int32_t used_slots = used_slots_.load(std::memory_order_relaxed); + int32_t cache_slots = cache_slots_.load(std::memory_order_relaxed); + // Is there room in the cache? + if (used_slots + N <= cache_slots) return true; + // Check if we can expand this cache? + if (cache_slots + N > max_cache_slots_) return false; + // Ok, we'll try to grab an entry from some other size class. + if (EvictRandomSizeClass(freelist_.size_class(), false) || + EvictRandomSizeClass(freelist_.size_class(), true)) { + // Succeeded in evicting, we're going to make our cache larger. However, we + // may have dropped and re-acquired the lock in EvictRandomSizeClass (via + // ShrinkCache), so the cache_size may have changed. Therefore, check and + // verify that it is still OK to increase the cache_size. + cache_slots = cache_slots_.load(std::memory_order_relaxed); + if (cache_slots + N <= max_cache_slots_) { + cache_slots += N; + cache_slots_.store(cache_slots, std::memory_order_relaxed); + return true; + } + } + return false; +} + +bool TransferCache::ShrinkCache(int locked_size_class, bool force) { + int32_t used_slots = used_slots_.load(std::memory_order_relaxed); + int32_t cache_slots = cache_slots_.load(std::memory_order_relaxed); + + // Start with a quick check without taking a lock. + if (cache_slots == 0) return false; + + int N = Static::sizemap()->num_objects_to_move(freelist_.size_class()); + + // We don't evict from a full cache unless we are 'forcing'. + if (!force && used_slots + N >= cache_slots) return false; + + // Release the other held lock before acquiring the current lock to avoid a + // dead lock. + struct SpinLockReleaser { + absl::base_internal::SpinLock *lock_; + + SpinLockReleaser(absl::base_internal::SpinLock *lock) : lock_(lock) { + lock_->Unlock(); + } + ~SpinLockReleaser() { lock_->Lock(); } + }; + SpinLockReleaser unlocker(&Static::transfer_cache()[locked_size_class].lock_); + void *to_free[kMaxObjectsToMove]; + int num_to_free; + { + absl::base_internal::SpinLockHolder h(&lock_); + + // Fetch while holding the lock in case they changed. + cache_slots = cache_slots_.load(std::memory_order_relaxed); + used_slots = used_slots_.load(std::memory_order_relaxed); + ASSERT(0 <= used_slots && used_slots <= cache_slots); + + if (cache_slots == 0) return false; + if (!arbitrary_transfer_ && cache_slots < N) return false; + + N = std::min(N, cache_slots); + int unused = cache_slots - used_slots; + if (N <= unused) { + cache_slots -= N; + cache_slots_.store(cache_slots, std::memory_order_relaxed); + return true; + } + if (!force) return false; + + num_to_free = N - unused; + cache_slots -= N; + used_slots -= num_to_free; + cache_slots_.store(cache_slots, std::memory_order_relaxed); + used_slots_.store(used_slots, std::memory_order_relaxed); + // Our internal slot array may get overwritten as soon as we drop the lock, + // so copy the items to free to an on stack buffer. + memcpy(to_free, GetSlot(used_slots), sizeof(void *) * num_to_free); + } + // Access the freelist while holding *neither* lock. + freelist_.InsertRange(to_free, num_to_free); + return true; +} + +void TransferCache::InsertRange(absl::Span batch, int N) { + const int B = Static::sizemap()->num_objects_to_move(freelist_.size_class()); + ASSERT(0 < N && N <= B); + int32_t used_slots = used_slots_.load(std::memory_order_relaxed); + if (N == B && used_slots + N <= max_cache_slots_) { + absl::base_internal::SpinLockHolder h(&lock_); + if (MakeCacheSpace(N)) { + // MakeCacheSpace can drop the lock, so refetch + used_slots = used_slots_.load(std::memory_order_relaxed); + ASSERT(0 <= used_slots && used_slots + N <= max_cache_slots_); + used_slots_.store(used_slots + N, std::memory_order_relaxed); + + void **entry = GetSlot(used_slots); + memcpy(entry, batch.data(), sizeof(void *) * N); + tracking::Report(kTCInsertHit, freelist_.size_class(), 1); + return; + } + } else if (arbitrary_transfer_) { + absl::base_internal::SpinLockHolder h(&lock_); + MakeCacheSpace(N); + // MakeCacheSpace can drop the lock, so refetch + int32_t used_slots = used_slots_.load(std::memory_order_relaxed); + int32_t cache_slots = cache_slots_.load(std::memory_order_relaxed); + int unused = cache_slots - used_slots; + if (N < unused) { + used_slots_.store(used_slots + N, std::memory_order_relaxed); + ASSERT(0 <= used_slots && used_slots + N <= max_cache_slots_); + void **entry = GetSlot(used_slots); + memcpy(entry, batch.data(), sizeof(void *) * N); + tracking::Report(kTCInsertHit, freelist_.size_class(), 1); + return; + } + // We could not fit the entire batch into the transfer cache + // so send the batch to the freelist and also take some elements from + // the transfer cache so that we amortise the cost of accessing spans + // in the freelist. Only do this if caller has sufficient space in + // batch. + // First of all fill up the rest of the batch with elements from the + // transfer cache. + int extra = B - N; + if (N > 1 && extra > 0 && used_slots > 0 && batch.size() >= B) { + // Take at most all the objects present + extra = std::min(extra, used_slots); + ASSERT(extra + N <= kMaxObjectsToMove); + used_slots -= extra; + used_slots_.store(used_slots, std::memory_order_relaxed); + + void **entry = GetSlot(used_slots); + memcpy(batch.data() + N, entry, sizeof(void *) * extra); + N += extra; +#ifndef NDEBUG + int rest = batch.size() - N - 1; + if (rest > 0) { + memset(batch.data() + N, 0x3f, rest * sizeof(void *)); + } +#endif + } + } + tracking::Report(kTCInsertMiss, freelist_.size_class(), 1); + freelist_.InsertRange(batch.data(), N); +} + +int TransferCache::RemoveRange(void **batch, int N) { + ASSERT(N > 0); + const int B = Static::sizemap()->num_objects_to_move(freelist_.size_class()); + int fetch = 0; + int32_t used_slots = used_slots_.load(std::memory_order_relaxed); + if (N == B && used_slots >= N) { + absl::base_internal::SpinLockHolder h(&lock_); + // Refetch with the lock + used_slots = used_slots_.load(std::memory_order_relaxed); + if (used_slots >= N) { + used_slots -= N; + used_slots_.store(used_slots, std::memory_order_relaxed); + ASSERT(0 <= used_slots); + void **entry = GetSlot(used_slots); + memcpy(batch, entry, sizeof(void *) * N); + tracking::Report(kTCRemoveHit, freelist_.size_class(), 1); + return N; + } + } else if (arbitrary_transfer_ && used_slots >= 0) { + absl::base_internal::SpinLockHolder h(&lock_); + // Refetch with the lock + used_slots = used_slots_.load(std::memory_order_relaxed); + + fetch = std::min(N, used_slots); + used_slots -= fetch; + ASSERT(0 <= used_slots); + used_slots_.store(used_slots, std::memory_order_relaxed); + void **entry = GetSlot(used_slots); + memcpy(batch, entry, sizeof(void *) * fetch); + tracking::Report(kTCRemoveHit, freelist_.size_class(), 1); + if (fetch == N) return N; + } + tracking::Report(kTCRemoveMiss, freelist_.size_class(), 1); + return freelist_.RemoveRange(batch + fetch, N - fetch) + fetch; +} + +size_t TransferCache::tc_length() { + return static_cast(used_slots_.load(std::memory_order_relaxed)); +} + +#endif +} // namespace tcmalloc diff --git a/tcmalloc/transfer_cache.h b/tcmalloc/transfer_cache.h new file mode 100644 index 000000000..89a44437a --- /dev/null +++ b/tcmalloc/transfer_cache.h @@ -0,0 +1,157 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_TRANSFER_CACHE_H_ +#define TCMALLOC_TRANSFER_CACHE_H_ + +#include +#include + +#include "absl/base/internal/spinlock.h" +#include "absl/base/macros.h" +#include "absl/base/thread_annotations.h" +#include "absl/types/span.h" +#include "tcmalloc/central_freelist.h" +#include "tcmalloc/common.h" + +namespace tcmalloc { + +#ifndef TCMALLOC_SMALL_BUT_SLOW + +// TransferCache is used to cache transfers of +// sizemap.num_objects_to_move(size_class) back and forth between +// thread caches and the central cache for a given size class. +class TransferCache { + public: + // A TransferCache may be used before its constructor runs. + // So we prevent lock_'s constructor from doing anything to the lock_ state. + TransferCache() : lock_(absl::base_internal::kLinkerInitialized) {} + TransferCache(const TransferCache &) = delete; + TransferCache &operator=(const TransferCache &) = delete; + + void Init(size_t cl) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + // These methods all do internal locking. + + // Insert the specified batch into the transfer cache. N is the number of + // elements in the range. RemoveRange() is the opposite operation. + void InsertRange(absl::Span batch, int N) LOCKS_EXCLUDED(lock_); + + // Returns the actual number of fetched elements and stores elements in the + // batch. + int RemoveRange(void **batch, int N) LOCKS_EXCLUDED(lock_); + + // Returns the number of free objects in the central cache. + size_t central_length() { return freelist_.length(); } + + // Returns the number of free objects in the transfer cache. + size_t tc_length(); + + // Returns the memory overhead (internal fragmentation) attributable + // to the freelist. This is memory lost when the size of elements + // in a freelist doesn't exactly divide the page-size (an 8192-byte + // page full of 5-byte objects would have 2 bytes memory overhead). + size_t OverheadBytes() { + return freelist_.OverheadBytes(); + } + + private: + // REQUIRES: lock is held. + // Tries to make room for a batch. If the cache is full it will try to expand + // it at the cost of some other cache size. Return false if there is no + // space. + bool MakeCacheSpace(int N) EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ for locked_size_class is held. + // Picks a "random" size class to steal slots from. In reality it just + // iterates over the sizeclasses but does so without taking a lock. Returns + // true on success. + // May temporarily lock a "random" size class. + static bool EvictRandomSizeClass(int locked_size_class, bool force); + + // REQUIRES: lock_ is *not* held. + // Tries to shrink the Cache. If force is true it will relase objects to + // spans if it allows it to shrink the cache. Return false if it failed to + // shrink the cache. Decreases cache_slots_ on success. + // May temporarily take lock_. If it takes lock_, the locked_size_class + // lock is released to keep the thread from holding two size class locks + // concurrently which could lead to a deadlock. + bool ShrinkCache(int locked_size_class, bool force) LOCKS_EXCLUDED(lock_); + + // Returns first object of the i-th slot. + void **GetSlot(size_t i) EXCLUSIVE_LOCKS_REQUIRED(lock_) { + return slots_ + i; + } + + // This lock protects all the data members. used_slots_ and cache_slots_ + // may be looked at without holding the lock. + absl::base_internal::SpinLock lock_; + + // Number of currently used cached entries in tc_slots_. This variable is + // updated under a lock but can be read without one. + std::atomic used_slots_; + + // Pointer to array of free objects. Use GetSlot() to get pointers to + // entries. + void **slots_ GUARDED_BY(lock_); + + // The current number of slots for this size class. This is an adaptive value + // that is increased if there is lots of traffic on a given size class. This + // variable is updated under a lock but can be read without one. + std::atomic cache_slots_; + + // Maximum size of the cache for a given size class. (immutable after Init()) + int32_t max_cache_slots_; + + CentralFreeList freelist_; + + // Cached value of IsExperimentActive(Experiment::TCMALLOC_ARBITRARY_TRANSFER) + bool arbitrary_transfer_; +} ABSL_CACHELINE_ALIGNED; + +#else + +// For the small memory model, the transfer cache is not used. +class TransferCache { + public: + TransferCache() {} + TransferCache(const TransferCache &) = delete; + TransferCache &operator=(const TransferCache &) = delete; + + void Init(size_t cl) EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + freelist_.Init(cl); + } + + void InsertRange(absl::Span batch, int N) { + freelist_.InsertRange(batch.data(), N); + } + + int RemoveRange(void **batch, int N) { + return freelist_.RemoveRange(batch, N); + } + + size_t central_length() { return freelist_.length(); } + + size_t tc_length() { return 0; } + + size_t OverheadBytes() { return freelist_.OverheadBytes(); } + + private: + CentralFreeList freelist_; +}; + +#endif +} // namespace tcmalloc + +#endif // TCMALLOC_TRANSFER_CACHE_H_ diff --git a/tcmalloc/want_hpaa.cc b/tcmalloc/want_hpaa.cc new file mode 100644 index 000000000..6782c4d87 --- /dev/null +++ b/tcmalloc/want_hpaa.cc @@ -0,0 +1,25 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/base/attributes.h" + +namespace tcmalloc { + +// This -if linked into a binary - overrides page_allocator.cc and forces HPAA +// on/subrelease off. +ABSL_ATTRIBUTE_UNUSED int default_want_hpaa() { return 1; } + +ABSL_ATTRIBUTE_UNUSED int default_subrelease() { return -1; } + +} // namespace tcmalloc diff --git a/tcmalloc/want_hpaa_subrelease.cc b/tcmalloc/want_hpaa_subrelease.cc new file mode 100644 index 000000000..fce2ff76a --- /dev/null +++ b/tcmalloc/want_hpaa_subrelease.cc @@ -0,0 +1,25 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/base/attributes.h" + +namespace tcmalloc { + +// This -if linked into a binary - overrides page_allocator.cc and forces HPAA +// on/subrelease on. +ABSL_ATTRIBUTE_UNUSED int default_want_hpaa() { return 1; } + +ABSL_ATTRIBUTE_UNUSED int default_subrelease() { return 1; } + +} // namespace tcmalloc diff --git a/tcmalloc/want_no_hpaa.cc b/tcmalloc/want_no_hpaa.cc new file mode 100644 index 000000000..9b311a478 --- /dev/null +++ b/tcmalloc/want_no_hpaa.cc @@ -0,0 +1,25 @@ +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/base/attributes.h" + +namespace tcmalloc { + +// This -if linked into a binary - overrides page_allocator.cc and +// forces HPAA off/subrelease off. +ABSL_ATTRIBUTE_UNUSED int default_want_hpaa() { return -1; } + +ABSL_ATTRIBUTE_UNUSED int default_subrelease() { return -1; } + +} // namespace tcmalloc