Skip to content

Commit

Permalink
refactor sync-search-indices (github#29152)
Browse files Browse the repository at this point in the history
* refactor sync-search-indices

* tidying
  • Loading branch information
peterbe authored Jul 19, 2022
1 parent b9baf14 commit 44a91fd
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 84 deletions.
9 changes: 0 additions & 9 deletions .github/workflows/sync-search-indices.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,6 @@ jobs:
env:
VERSION: ${{ github.event.inputs.version }}
LANGUAGE: ${{ github.event.inputs.language }}
# We don't want or need the changelog entries in this context.
# Pages that display the content from these isn't included
# in search index bodies anyway.
CHANGELOG_DISABLED: true
# If a reusable, or anything in the `data/*` directory is deleted
# you might get a
#
Expand All @@ -89,11 +85,6 @@ jobs:
# But that'll get fixed in the next translation pipeline. For now,
# let's just accept an empty string instead.
THROW_ON_EMPTY: false
# Because the overload protection runs in NODE_ENV==production
# and it can break the sync-search.
DISABLE_OVERLOAD_PROTECTION: true
# Render caching won't help when we visit every page exactly once.
DISABLE_RENDERING_CACHE: true

run: npm run sync-search

Expand Down
3 changes: 0 additions & 3 deletions .github/workflows/sync-search-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,4 @@ jobs:
# Set filtered to only these so it doesn't run for too long.
LANGUAGE: en
VERSION: free-pro-team@latest
# Because the overload protection runs in NODE_ENV==production
# and it can break the sync-search.
DISABLE_OVERLOAD_PROTECTION: true
run: npm run sync-search
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@
"start-all-languages": "cross-env NODE_ENV=development nodemon server.mjs",
"sync-search": "cross-env NODE_OPTIONS='--max_old_space_size=8192' start-server-and-test sync-search-server 4002 sync-search-indices",
"sync-search-ghes-release": "cross-env GHES_RELEASE=1 start-server-and-test sync-search-server 4002 sync-search-indices",
"sync-search-indices": "script/sync-search-indices.js",
"sync-search-server": "cross-env NODE_ENV=production PORT=4002 node server.mjs",
"sync-search-indices": "script/search/sync-search-indices.js",
"sync-search-server": "cross-env NODE_ENV=production PORT=4002 CHANGELOG_DISABLED=true DISABLE_RENDERING_CACHE=true DISABLE_OVERLOAD_PROTECTION=true node server.mjs",
"translation-check": "start-server-and-test translation-check-server 4002 translation-check-test",
"translation-check-server": "cross-env NODE_ENV=test PORT=4002 node server.mjs",
"translation-check-test": "script/i18n/test-html-pages.js",
Expand Down
6 changes: 3 additions & 3 deletions script/search/lunr-search-index.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ export default class LunrIndex {
return Object.fromEntries(this.records.map((record) => [record.objectID, record]))
}

async write() {
async write(outDirectory = path.posix.join(__dirname, '../../lib/search/indexes')) {
this.build()

// Write the parsed records
Expand All @@ -87,7 +87,7 @@ export default class LunrIndex {
.then(compress)
.then((content) =>
fs.writeFile(
path.posix.join(__dirname, '../../lib/search/indexes', `${this.name}-records.json.br`),
path.join(outDirectory, `${this.name}-records.json.br`),
content
// Do not set to 'utf8'
)
Expand All @@ -99,7 +99,7 @@ export default class LunrIndex {
.then(compress)
.then((content) =>
fs.writeFile(
path.posix.join(__dirname, '../../lib/search/indexes', `${this.name}.json.br`),
path.join(outDirectory, `${this.name}.json.br`),
content
// Do not set to 'utf8'
)
Expand Down
99 changes: 99 additions & 0 deletions script/search/sync-search-indices.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env node

// [start-readme]
//
// This script is run automatically via GitHub Actions on every push to `main` to generate searchable data.
// It can also be run manually. For more info see [contributing/search.md](contributing/search.md)
//
// [end-readme]

import path from 'path'

import { program, Option } from 'commander'

import { languageKeys } from '../../lib/languages.js'
import { allVersionKeys } from '../../lib/all-versions.js'
import searchSync from './sync.js'

const DEFAULT_OUT_DIRECTORY = path.join('lib', 'search', 'indexes')

program
.description('Creates search records (and Lunr indexes) by scraping')
.option('-v, --verbose', 'Verbose outputs')
.addOption(new Option('-V, --version <VERSION>', 'Specific versions').choices(allVersionKeys))
.addOption(
new Option('-l, --language <LANGUAGE>', 'Which languages to focus on').choices(languageKeys)
)
.addOption(
new Option('--not-language <LANGUAGE>', 'Specific language to omit').choices(languageKeys)
)
.option('-d, --dry-run', 'Does not write to disk')
.option(
'-o, --out-directory <DIRECTORY>',
`Where to dump the created files (default ${DEFAULT_OUT_DIRECTORY}`
)
.parse(process.argv)

main(program.opts())

async function main(opts) {
let language
if ('language' in opts) {
language = opts.language
if (process.env.LANGUAGE) {
console.warn(
`'language' specified as argument ('${language}') AND environment variable ('${process.env.LANGUAGE}')`
)
}
} else {
if (process.env.LANGUAGE && process.env.LANGUAGE !== 'all') {
language = process.env.LANGUAGE
if (!languageKeys.includes(language)) {
throw new Error(
`Environment variable 'VERSION' (${language}) is not recognized. Must be one of ${languageKeys}`
)
}
}
}
const notLanguage = opts.notLanguage
if (notLanguage && language) {
throw new Error("Can't specify --language *and* --not-language")
}

let version
if ('version' in opts) {
version = opts.version
if (process.env.VERSION) {
console.warn(
`'version' specified as argument ('${version}') AND environment variable ('${process.env.VERSION}')`
)
}
} else {
if (process.env.VERSION && process.env.VERSION !== 'all') {
version = process.env.VERSION
if (!allVersionKeys.includes(version)) {
throw new Error(
`Environment variable 'VERSION' (${version}) is not recognized. Must be one of ${allVersionKeys}`
)
}
}
}

let dryRun = false
if ('dryRun' in opts) {
dryRun = opts.dryRun
} else {
dryRun = Boolean(JSON.parse(process.env.DRY_RUN || 'false'))
}

const outDirectory = opts.outDirectory || DEFAULT_OUT_DIRECTORY

const options = {
dryRun,
language,
notLanguage,
version,
outDirectory,
}
await searchSync(options)
}
49 changes: 17 additions & 32 deletions script/search/sync.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,44 +6,29 @@ import { allVersions } from '../../lib/all-versions.js'
import { namePrefix } from '../../lib/search/config.js'
import LunrIndex from './lunr-search-index.js'

// Lunr

// Build a search data file for every combination of product version and language
// e.g. `github-docs-dotcom-en.json` and `github-docs-2.14-ja.json`
export default async function syncSearchIndexes(opts = {}) {
export default async function syncSearchIndexes({
language,
version,
dryRun,
notLanguage,
outDirectory,
}) {
const t0 = new Date()
if (opts.language) {
if (!Object.keys(languages).includes(opts.language)) {
console.log(
`Error! ${opts.language} not found. You must provide a currently supported two-letter language code.`
)
process.exit(1)
}
}

if (opts.version) {
if (!Object.keys(allVersions).includes(opts.version)) {
console.log(`${opts.version} not one of ${Object.keys(allVersions)}`)
throw new Error(
`Error! ${opts.version} not found. You must provide a currently supported version in <PLAN@RELEASE> format.`
)
}
}

// build indices for a specific language if provided; otherwise build indices for all languages
const languagesToBuild = opts.language
? Object.keys(languages).filter((language) => language === opts.language)
: Object.keys(languages)
const languagesToBuild = Object.keys(languages).filter((lang) =>
notLanguage ? notLanguage !== lang : language ? language === lang : true
)

// build indices for a specific version if provided; otherwise build indices for all veersions
const versionsToBuild = opts.version
? Object.keys(allVersions).filter((version) => version === opts.version)
: Object.keys(allVersions)
// build indices for a specific version if provided; otherwise build indices for all versions
const versionsToBuild = Object.keys(allVersions).filter((ver) =>
version ? version === ver : true
)

console.log(
`Building indices for ${opts.language || 'all languages'} and ${
opts.version || 'all versions'
}.\n`
`Building indices for ${language || 'all languages'} and ${version || 'all versions'}.\n`
)

// Exclude WIP pages, hidden pages, index pages, etc
Expand Down Expand Up @@ -84,8 +69,8 @@ export default async function syncSearchIndexes(opts = {}) {
)
const index = new LunrIndex(indexName, records)

if (!opts.dryRun) {
await index.write()
if (!dryRun) {
await index.write(outDirectory)
console.log('wrote index to file: ', indexName)
}
}
Expand Down
35 changes: 0 additions & 35 deletions script/sync-search-indices.js

This file was deleted.

0 comments on commit 44a91fd

Please sign in to comment.