Skip to content

Commit

Permalink
Merge pull request #9 from jblazquez/master
Browse files Browse the repository at this point in the history
Add support for ordering object files during merge
  • Loading branch information
tux3 authored Jan 19, 2025
2 parents 09154fc + 5583588 commit badafe3
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 27 deletions.
25 changes: 22 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ FLAGS:
-v, --verbose Print verbose information
OPTIONS:
-k, --keep-symbols <keep-symbols>... Accepts regexes of the symbol names to keep global, and localizes the rest
-r, --remove-symbols <remove-symbols>... Accepts regexes of the symbol names to hide, and keep the rest global
-o, --output <output> Output static library
-k, --keep-symbols <keep-symbols>... Accepts regexes of the symbol names to keep global, and localizes the rest
--order-file <order-file> Order file to control the sorting of merged objects
-o, --output <output> Output static library
-r, --remove-symbols <remove-symbols>... Accepts regexes of the symbol names to hide, and keep the rest global
ARGS:
<INPUTS>... Static libraries to merge
Expand All @@ -54,6 +55,24 @@ You may specify a different objcopy implementation with the `OBJCOPY` env var, a
You can use armerge to handle Linux/Android archives on a macOS host if the right toolchain is installed.
(i.e. you may need to set `LD`, `OBJCOPY`, and `RANLIB` to point to the Android NDK, or to some other toolchain).

## Object merge order

By default, objects are passed to the linker in an unspecified order. Linkers typically lay out the output file's sections in the order the inputs are specified.

If you want to control the order in which some of the object files are merged, you can use the `--order-file` option. With this option, you can specify an order file that controls the relative order in which armerge passes the listed object files to the linker, so you can precisely control the order in which certain sections will be laid out in the output.

The order file is a plain text file with one entry per line, in the format `{INPUT_LIB}@{OBJNAME}`, where `INPUT_LIB` is the name of the input library and `OBJNAME` is the name of the object file to select from that library. Blank lines or lines starting with the `#` sign are ignored. Any object files not listed are placed after all of the listed objects in an unspecified order. For example:

```
# Place the custom malloc object file first
[email protected]
# Place the app's entry point next
[email protected]
# All other object files are placed after this in an unspecified order.
```

## Principle of operation

### Merging static libraries
Expand Down
6 changes: 6 additions & 0 deletions src/archives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,12 @@ pub fn extract_objects<I: IntoParallelIterator<Item = InputLibrary<R>>, R: Read>
})
}

pub fn get_object_name_from_path(path: &std::path::Path) -> String {
let filename = path.file_name().unwrap().to_string_lossy();
let name_parts = filename.rsplitn(3, '.').collect::<Vec<_>>();
name_parts[2].to_string()
}

pub fn create_index(archive_path: &std::path::Path) -> Result<(), MergeError> {
use std::process::Command;

Expand Down
13 changes: 13 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,26 @@ impl ArMerger {
self,
keep_or_remove: ArmergeKeepOrRemove,
symbols_regexes: Iter,
) -> Result<(), MergeError> {
self.merge_and_localize_ordered(keep_or_remove, symbols_regexes, std::iter::empty())
}

/// Merge input libraries in a specified order and localize non-public symbols
/// `keep_symbols_regexes` contains the regex name pattern for public symbols to keep exported
/// `object_order` contains the order in which certain object files will be merged
pub fn merge_and_localize_ordered<Iter: IntoIterator<Item = Regex>>(
self,
keep_or_remove: ArmergeKeepOrRemove,
symbols_regexes: Iter,
object_order: impl IntoIterator<Item = String>
) -> Result<(), MergeError> {
objects::merge(
self.builder,
self.extracted.contents_type,
self.extracted.object_dir,
keep_or_remove,
symbols_regexes.into_iter().collect(),
object_order.into_iter().enumerate().map(|(i, s)| (s, i)).collect()
)
}
}
25 changes: 22 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use armerge::{ArmergeKeepOrRemove, ArMerger};
use regex::Regex;
use std::error::Error;
use std::path::PathBuf;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use structopt::StructOpt;
use tracing::{error, Level};
use tracing_subscriber::filter::Directive;
Expand All @@ -18,6 +20,10 @@ struct Opt {
#[structopt(short, long, number_of_values = 1)]
remove_symbols: Vec<String>,

/// Order file to control the sorting of merged objects
#[structopt(long, parse(from_os_str))]
order_file: Option<PathBuf>,

/// Output static library
#[structopt(short, long, parse(from_os_str))]
output: PathBuf,
Expand Down Expand Up @@ -62,6 +68,11 @@ fn err_main(opt: Opt) -> Result<(), Box<dyn Error>> {
}

let merger = ArMerger::new_from_paths(&opt.inputs, &opt.output)?;
let object_order = if let Some(path) = &opt.order_file {
parse_order_file(path)
} else {
Vec::new()
};

match (opt.keep_symbols.is_empty(), opt.remove_symbols.is_empty()) {
(true, true) => {
Expand All @@ -75,15 +86,15 @@ fn err_main(opt: Opt) -> Result<(), Box<dyn Error>> {
.into_iter()
.map(|s| Regex::new(&s))
.collect::<Result<Vec<_>, _>>()?;
merger.merge_and_localize(ArmergeKeepOrRemove::KeepSymbols, keep_symbols)?;
merger.merge_and_localize_ordered(ArmergeKeepOrRemove::KeepSymbols, keep_symbols, object_order)?;
},
(true, false) => {
let remove_symbols: Vec<Regex> = opt
.remove_symbols
.into_iter()
.map(|s| Regex::new(&s))
.collect::<Result<Vec<_>, _>>()?;
merger.merge_and_localize(ArmergeKeepOrRemove::RemoveSymbols, remove_symbols)?;
merger.merge_and_localize_ordered(ArmergeKeepOrRemove::RemoveSymbols, remove_symbols, object_order)?;
},
(false, false) => {
return Err("Can't have both keep-symbols and remove-symbols options at the same time".to_string().into());
Expand All @@ -92,3 +103,11 @@ fn err_main(opt: Opt) -> Result<(), Box<dyn Error>> {

Ok(())
}

fn parse_order_file(path: &Path) -> Vec<String> {
BufReader::new(File::open(path).unwrap())
.lines()
.map(|line| line.unwrap().trim().to_string())
.filter(|line| !line.is_empty() && !line.starts_with('#'))
.collect()
}
16 changes: 13 additions & 3 deletions src/objects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ mod builtin_filter;
mod system_filter;

use crate::arbuilder::ArBuilder;
use crate::objects::syms::ObjectSyms;
use crate::archives::get_object_name_from_path;
use crate::{ArchiveContents, ArmergeKeepOrRemove, MergeError};
use regex::Regex;
use std::collections::HashMap;
Expand All @@ -23,7 +23,7 @@ pub fn merge_required_objects(
contents_type: ArchiveContents,
obj_dir: &Path,
merged_path: &Path,
objs: &HashMap<PathBuf, ObjectSyms>,
objs: &[PathBuf],
keep_or_remove: ArmergeKeepOrRemove,
regexes: &[Regex],
) -> Result<(), MergeError> {
Expand Down Expand Up @@ -59,6 +59,7 @@ pub fn merge(
objects: ObjectTempDir,
keep_or_remove: ArmergeKeepOrRemove,
mut regexes: Vec<Regex>,
object_order: HashMap<String, usize>,
) -> Result<(), MergeError> {
let merged_name = "merged.o";
let mut merged_path = objects.dir.path().to_owned();
Expand All @@ -77,6 +78,15 @@ pub fn merge(
return Err(MergeError::NoObjectsLeft);
}

let mut sorted_objects = required_objects.into_keys().collect::<Vec<_>>();
sorted_objects.sort_by(|a, b| {
let name1 = get_object_name_from_path(a);
let name2 = get_object_name_from_path(b);
let i1 = object_order.get(&name1).unwrap_or(&usize::MAX);
let i2 = object_order.get(&name2).unwrap_or(&usize::MAX);
i1.cmp(i2)
});

if keep_or_remove == ArmergeKeepOrRemove::KeepSymbols {
// When filtering symbols to keep just the public API visible,
// we must make an exception for the unwind symbols (if linked statically)
Expand All @@ -88,7 +98,7 @@ pub fn merge(
contents_type,
objects.dir.path(),
&merged_path,
&required_objects,
&sorted_objects,
keep_or_remove,
&regexes,
)?;
Expand Down
6 changes: 2 additions & 4 deletions src/objects/builtin_filter.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
use crate::{ArmergeKeepOrRemove, MergeError};
use regex::Regex;
use std::collections::HashMap;
use std::path::{Path, PathBuf};

use crate::objects::merge;
use crate::objects::syms::ObjectSyms;

pub fn merge_required_objects(
_obj_dir: &Path,
merged_path: &Path,
objects: &HashMap<PathBuf, ObjectSyms>,
objects: &[PathBuf],
keep_or_remove: ArmergeKeepOrRemove,
regexes: &[Regex],
) -> Result<(), MergeError> {
Expand All @@ -18,7 +16,7 @@ pub fn merge_required_objects(
}

// The merging part is still not builtin, it has to be done by a real linker
merge::create_merged_object(merged_path, &[], objects.keys(), false)?;
merge::create_merged_object(merged_path, &[], objects, false)?;

// Filtering the symbols is faster in pure Rust, compared to calling the system's objcopy
let merged_elf = std::fs::read(merged_path)?;
Expand Down
9 changes: 3 additions & 6 deletions src/objects/filter_deps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use rayon::prelude::*;
use regex::Regex;
use tracing::{event_enabled, info, Level};

use crate::archives::get_object_name_from_path;
use crate::objects::syms::ObjectSyms;

fn add_deps_recursive(
Expand Down Expand Up @@ -40,11 +41,9 @@ pub fn filter_required_objects(
for (obj_path, obj) in object_syms.iter() {
if obj.has_exported_symbols {
if event_enabled!(Level::INFO) {
let filename = obj_path.file_name().unwrap().to_string_lossy();
let name_parts = filename.rsplitn(3, '.').collect::<Vec<_>>();
info!(
"Will merge {:?} and its dependencies, as it contains global kept symbols",
name_parts[2],
get_object_name_from_path(obj_path),
);
}
required_objs.insert(obj_path.clone());
Expand All @@ -55,11 +54,9 @@ pub fn filter_required_objects(
if event_enabled!(Level::INFO) {
for obj in object_syms.keys() {
if !required_objs.contains(obj) {
let filename = obj.file_name().unwrap().to_string_lossy();
let name_parts = filename.rsplitn(3, '.').collect::<Vec<_>>();
info!(
"`{}` is not used by any kept objects, it will be skipped",
name_parts[2]
get_object_name_from_path(obj)
)
}
}
Expand Down
15 changes: 7 additions & 8 deletions src/objects/system_filter.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
use goblin::{peek_bytes, Hint};
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::ffi::OsString;
use std::io::{Read, Seek, SeekFrom, Write};
use std::path::{Path, PathBuf};
use std::process::Command;
use std::str::FromStr;

use crate::objects::merge::create_merged_object;
use crate::objects::syms::ObjectSyms;
use crate::{ArmergeKeepOrRemove, MergeError};
use object::{Object, ObjectSymbol, SymbolKind};
use regex::Regex;
Expand Down Expand Up @@ -144,23 +143,23 @@ fn filter_symbols(object_path: &Path, filter_list_path: &Path) -> Result<(), Mer
pub fn merge_required_macho_objects(
obj_dir: &Path,
merged_path: &Path,
objects: &HashMap<PathBuf, ObjectSyms>,
objects: &[PathBuf],
keep_or_remove: ArmergeKeepOrRemove,
regexes: &[Regex],
) -> Result<(), MergeError> {
let filter_path = create_symbol_filter_list(obj_dir, objects.keys(), keep_or_remove, regexes)?;
create_filtered_merged_macho_object(merged_path, objects.keys(), &filter_path)
let filter_path = create_symbol_filter_list(obj_dir, objects, keep_or_remove, regexes)?;
create_filtered_merged_macho_object(merged_path, objects, &filter_path)
}

pub fn merge_required_objects(
obj_dir: &Path,
merged_path: &Path,
objects: &HashMap<PathBuf, ObjectSyms>,
objects: &[PathBuf],
keep_or_remove: ArmergeKeepOrRemove,
regexes: &[Regex],
) -> Result<(), MergeError> {
let filter_path = create_symbol_filter_list(obj_dir, objects.keys(), keep_or_remove, regexes)?;
create_filtered_merged_object(merged_path, objects.keys(), &filter_path)?;
let filter_path = create_symbol_filter_list(obj_dir, objects, keep_or_remove, regexes)?;
create_filtered_merged_object(merged_path, objects, &filter_path)?;

// If a symbol we localize is in a COMDAT section group, we also want to turn it into a regular
// section group. Otherwise the local symbol is not really local, because the containing section
Expand Down

0 comments on commit badafe3

Please sign in to comment.