-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgenerate.py
executable file
·267 lines (213 loc) · 8.78 KB
/
generate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
"""Generate reference documentation for Weights & Biases.
Creates docs for the Weights & Biases SDK library and for the wandb CLI tool.
For help, run:
python generate.py --help
"""
import argparse
import os
import shutil
import wandb
import cli
import library
import re
# Replace auto-generated title as a key, provide the preferred title as the value
# Note: This is replaced by Hugo. Future, remove this and function that calls it.
MARKDOWN_TITLES = {
"python": "Python Library",
"data-types": "Data Types",
"public-api": "Import & Export API",
"integrations": "Integrations",
"ref": "Reference",
"keras": "Keras",
"weave": "Weave",
}
title_mapping = {
"python": "Python Library",
"data-types": "Data Types",
"integrations": "Integrations",
"public-api": "Import & Export API",
"wandb": "Command Line Interface"
}
def main(args):
commit_id = args.commit_id
# check valid commit id
check_commit_id(commit_id)
output_dir = args.output_dir
# template_file = args.template_file
code_url_prefix = "/".join([args.repo, "tree", f"{commit_id}", args.prefix])
ref_dir = os.path.join(output_dir, library.DIRNAME)
for library.dirname in library.DIRNAMES_TO_TITLES.keys():
if library.dirname in library.SKIPS or library.dirname in library.EXTERNAL:
continue
shutil.rmtree(os.path.join(ref_dir, library.dirname), ignore_errors=True)
# create the library docs
library.build(commit_id, code_url_prefix, output_dir)
# convert .build output to a format docodile can use
rename_to_readme(ref_dir)
# create the CLI docs
print("Building CLI....")
cli.build(ref_dir)
# change folders with single README to file.md
single_folder_format(ref_dir)
# clean up the file names
clean_names(ref_dir)
# Fix frontmatter
reformat_title_to_frontmatter(ref_dir)
# Rename README.md to _index.md
reformat_and_rename_readme(ref_dir, title_mapping)
def rename_to_readme(directory):
"""Moves all folder-level markdown files into respective folders, as a README."""
for root, folders, file_names in os.walk(directory):
for file_name in file_names:
raw_file_name, suffix = file_name[:-3], file_name[-3:]
if suffix == ".md" and raw_file_name in folders:
os.rename(
os.path.join(f"{root}", file_name),
os.path.join(f"{root}", raw_file_name, "README.md"),
)
# Format README doc titles to preferred title
library.format_readme_titles(
os.path.join(f"{root}", raw_file_name, "README.md"), MARKDOWN_TITLES
)
def clean_names(directory):
"""Converts names to lower case and removes spaces."""
for root, folders, file_names in os.walk(directory):
for name in file_names:
if name == "README.md":
short_name = name
else:
short_name = name.replace(" ", "-").lower()
os.rename(
os.path.join(f"{root}", f"{name}"),
os.path.join(f"{root}", f"{short_name}"),
)
def reformat_title_to_frontmatter(directory):
"Fixes the title in the frontmatter of markdown files. Required for Hugo."
for root, _, files in os.walk(directory):
for file in files:
if file.endswith(".md"): # Only process markdown files
file_path = os.path.join(root, file)
with open(file_path, "r", encoding="utf-8") as f:
lines = f.readlines()
if lines and lines[0].startswith("title:"):
# Reformat to markdown frontmatter
frontmatter = f"---\n{lines[0].strip()}\n---\n"
# Write back to the file with the updated format
with open(file_path, "w", encoding="utf-8") as f:
f.write(frontmatter)
f.writelines(lines[1:]) # Write the rest of the file
def single_folder_format(directory):
"""Converts all sub-folders that only contain README.md to single files.
So the tree:
- folder
- README.md
changes to
- folder.md
Args:
directory: str. The directory to walk through.
"""
for root, folders, file_names in os.walk(directory):
number_of_folders = len(folders)
number_of_files = len(file_names)
if number_of_folders == 0 and number_of_files == 1:
if file_names[0] == "README.md":
cwd = os.path.split(root)[-1]
parent_root = os.path.abspath(os.path.join(root, ".."))
os.rename(
os.path.join(f"{root}", "README.md"),
os.path.join(f"{parent_root}", f"{cwd}.md"),
)
os.rmdir(root)
def reformat_and_rename_readme(directory, title_mapping):
for root, _, files in os.walk(directory):
for file in files:
if file == "README.md" or file == "_index.md":
file_path = os.path.join(root, file)
with open(file_path, "r", encoding="utf-8") as f:
lines = f.readlines()
# Remove CTA buttons using regex
cleaned_lines = [
line for line in lines
if not re.match(r"\{\{\<\s*cta-button.*?\>\}\}", line.strip())
]
# Update the title in the frontmatter
for i, line in enumerate(cleaned_lines):
if line.strip().startswith("title:"):
# Extract the old title value
match = re.match(r"title:\s*(.+)", line.strip())
if match:
old_title = match.group(1).strip()
# Check if the old title is in the dictionary
if old_title in title_mapping:
new_title = title_mapping[old_title]
# Replace the line with the new title
cleaned_lines[i] = f"title: {new_title}\n"
print(f"Updated title: {old_title} -> {new_title}")
# Rename README.md to _index.md
new_file_path = os.path.join(root, "_index.md")
if file == "README.md":
os.rename(file_path, new_file_path)
file_path = new_file_path
# Write back the cleaned and updated file
with open(file_path, "w", encoding="utf-8") as f:
f.writelines(cleaned_lines)
print(f"Processed: {file_path}")
def get_args():
parser = argparse.ArgumentParser(
description="Generate documentation for the wandb library and CLI."
)
# The commit_id can be the complete git hash
# or can be the tag for the version of code.
# eg. HASH = https://github.com/wandb/wandb/tree/c129c32964aca6a8509d98a0cc3c9bc46f2d8a4c
# eg. TAG = https://github.com/wandb/wandb/tree/v0.15.5
parser.add_argument(
"--commit_id",
type=str,
help="Hash/Tag for the git commit to base the docs on. "
+ "Ensures that the source code is properly linked.",
)
parser.add_argument(
"--repo",
type=str,
default="https://www.github.com/wandb/wandb",
help="Repo to link for source code. Defaults to wandb/wandb.",
)
parser.add_argument(
"--prefix",
type=str,
default="wandb",
help="Folder within GitHub repo where wandb SDK code is located. "
+ "Defaults to wandb.",
)
parser.add_argument(
"--output_dir",
type=str,
default=os.getcwd(),
help="Folder into which to place folder "
"{library.DIRNAME}/ containing results. " + "Defaults to current directory.",
)
return parser.parse_args()
def check_commit_id(commit_id):
"""Checks for a valid commit id.
Args:
commit_id: The commit id provided
"""
if commit_id == "latest":
# using latest version instead of a commit id -- this should work as long as
# we aren't hosting legacy doc versions -- if we do, we'll need to go back
# to passing an actual id
pass
elif "." in commit_id:
# commit_id is a version
wandb_version = f"v{wandb.__version__}"
assert (
wandb_version == commit_id
), f"git version {commit_id} does not match wandb version {wandb_version}"
else:
# commit_id is a git hash
commit_id_len = len(commit_id)
assert (
commit_id_len == 40
), f"git hash must have all 40 characters, was {commit_id}"
if __name__ == "__main__":
main(get_args())