forked from leanprover-community/mathlib4
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUnused.lean
133 lines (111 loc) · 5.61 KB
/
Unused.lean
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/-
Copyright (c) 2024 Lean FRO. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Kim Morrison
-/
import Cli
import Batteries.Data.List.Basic
import ImportGraph.Imports
import LongestPole.Rectangles
import Mathlib.Util.FormatTable
/-!
# `lake exe unused`
A tool for library-scale analysis of unused imports.
`lake exe unused module_1 module_2 ... module_n` will generate a markdown table with rows and
columns indexed 1 through n, with an `x` in `(i, j)` if `module_i` transitively imports `module_j`,
but makes no direct use of the declarations there.
(This is often an indication that some *intermediate* file `module_k` could be split into two parts,
one that needs `module_j`, and another that is needed by `module_i`, thereby removing the transitive
import of `module_j` into `module_i`.)
The tool further identifies large rectangles of `x`s in this table, and suggests `lake exe graph`
commands that can show the dependency structure from the unused import up to the earliest file which
"unnecessarily" transitively imports it.
`scripts/unused_in_pole.sh module` (or no argument for all of Mathlib), will calculate the current
longest pole in Mathlib (note for this you need to be on a commit on which the speed center has
run), and then feed that list of modules into `lake exe unused`.
Demo video at https://youtu.be/PVj_FHGwhUI
-/
open Cli
open Lean
/-- Count the number of declarations in each module. -/
def countDecls (modules : Array Name) : CoreM (Array Nat) := do
let env ← getEnv
let mut counts := Array.mkArray modules.size 0
let moduleIndices := Std.HashMap.ofList <| modules.zipWithIndex.toList
for (n, _) in env.constants.map₁ do
if ! n.isInternal then
if let some m := env.getModuleFor? n then
if let some i := moduleIndices[m]? then
counts := counts.modify i (· + 1)
return counts
/-- Implementation of `lake exe unused` -/
def unusedImportsCLI (args : Cli.Parsed) : IO UInt32 := do
let output := args.flag! "output" |>.as! String
let n := args.flag! "n" |>.as! Nat
let modules := (args.variableArgsAs! ModuleName).toList
if modules.isEmpty then
IO.eprintln
"No modules specified, please specify at least two modules on the command line."
return 1
-- Should we sort the modules?
-- The code below assumes that it is "deeper files first", as reported by `lake exe pole`.
searchPathRef.set compile_time_search_path%
-- It may be reasonable to remove this again after https://github.com/leanprover/lean4/pull/6325
unsafe enableInitializersExecution
let (unused, _) ← unsafe withImportModules #[{module := `Mathlib}] {} (trustLevel := 1024)
fun env => Prod.fst <$> Core.CoreM.toIO
(ctx := { fileName := "<CoreM>", fileMap := default }) (s := { env := env }) do
let unused ← unusedTransitiveImports modules
let counts ← countDecls modules.toArray
return (unused, counts)
let headings := #["#", "module"] ++ ((List.range' 1 modules.length).map toString)
let rows := modules.mapIdx fun i m =>
let data := (unused.lookup m).getD []
#[toString (i + 1), m.toString] ++
modules.map fun m' => if data.contains m' then "x" else ""
IO.println s!"Writing table to {output}."
IO.FS.writeFile output (formatTable headings rows.toArray)
let data := unused.flatMap fun (m, u) => u.map fun n => (modules.indexOf m, modules.indexOf n)
let rectangles := maximalRectangles data
|>.map (fun r => (r, r.area))
-- Prefer rectangles with larger areas.
|>.mergeSort (fun r₁ r₂ => r₁.2 > r₂.2)
-- The `lake exe graph` command we print only depends on the top-right corner, so deduplicate.
|>.pwFilter (fun r₁ r₂ => (r₁.1.top, r₂.1.right) ≠ (r₂.1.top, r₁.1.right))
|>.take n
for (i, (r, _)) in rectangles.enum do
-- We use `--from top` so that the graph starts at the module immediately *before*
-- the block of unused imports. This is useful for deciding how a split should be made.
-- We use `--to (right-1)` so that the graph ends at the earliest of the modules
-- which do not make use of any of the unused block.
-- Note that this means that the later modules which do not use the block are not displayed,
-- and in particular it is not possible to see from the graph the size of the later block
-- which makes no use of the earlier unused block.
-- Perhaps modifying `to` to allow multiple modules, and highlighting all of these in some way,
-- would be a useful addition.
let from_idx := min r.top (modules.length - 1)
let to_idx := r.right - 1
IO.println
s!"lake exe graph --from {modules[from_idx]!} --to {modules[to_idx]!} unused{i}.pdf"
return 0
/-- Setting up command line options and help text for `lake exe unused`. -/
def unused : Cmd := `[Cli|
unused VIA unusedImportsCLI; ["0.0.1"]
"Determine unused imports amongst a given set of modules.\n\
Produces a table with rows and columns indexed by the specified modules,\n\
with an 'x' in row A, column B if module A imports module B,\n\
but does not make any transitive use of constants defined in B.
This table is written to `unused.md`, and a number of `lake exe graph` commands are printed
to visualize the largest rectangles of unused imports."
FLAGS:
output : String; "Write the table to a given file instead of `unused.md`."
n : Nat; "Number of `lake exe graph` commands to print."
ARGS:
...modules : ModuleName; "Modules to check for unused imports."
EXTENSIONS:
author "mhuisi";
defaultValues! #[("output", "unused.md"), ("n", "10")]
]
/-- `lake exe unused` -/
def main (args : List String) : IO UInt32 :=
unused.validate args