Skip to content

Commit

Permalink
feat: use wikipedia wrapper instead of axios
Browse files Browse the repository at this point in the history
Signed-off-by: Ruihang Xia <[email protected]>
  • Loading branch information
waynexia committed May 18, 2024
1 parent 91727d5 commit 5509f32
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 57 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: CI

on:
push:
branches: [ '*' ]
branches: ['*']
pull_request:
branches: [ '*' ]
branches: ['*']

jobs:
test:
Expand Down
5 changes: 2 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@
"test": "vitest"
},
"dependencies": {
"axios": "^1.6.8",
"cheerio": "1.0.0-rc.12",
"htmlparser2": "^9.1.0",
"react": "^18.3.1",
"react-dom": "^18.3.1"
"react-dom": "^18.3.1",
"wikipedia": "^2.1.2"
},
"devDependencies": {
"@antfu/eslint-config": "^2.16.1",
Expand Down
43 changes: 27 additions & 16 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@ import { useEffect, useState } from 'react'
import { build_list } from './list'

function App() {
// const [list, setList] = useState([])
// build_list(100, 2000).then(setList)

const [list, setList] = useState<{ from: number, to: number, person: { desc: string, link: string | undefined, death: number | undefined }, other_people: { desc: string, link: string | undefined, death: number | undefined }[] }[]>([])

useEffect(() => {
Expand Down
19 changes: 4 additions & 15 deletions src/fetch.test.tsx
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import nock from 'nock'
import { describe, expect, it } from 'vitest'
import { fetch_people_in } from './fetch'
import { extract_people_from_html } from './fetch'

describe('fetch_people_in', () => {
it('call fetch_people_in with 1990', async () => {
const year = 1990
describe('extract_people_from_html', () => {
it('call extract_people_from_html with 100', async () => {
// scraped from https://en.wikipedia.org/wiki/AD_100
const mockData = `
<div>
Expand All @@ -29,16 +27,7 @@ describe('fetch_people_in', () => {
<li><a href="/wiki/Wang_Chong" title="Wang Chong">Wang Chong</a>, Chinese philosopher (b. <a href="/wiki/AD_27" title="AD 27">AD 27</a>)</li></ul>
</div>
`

nock('https://en.wikipedia.org')
.defaultReplyHeaders({
'access-control-allow-origin': '*',
'access-control-allow-credentials': 'true',
})
.get(`/wiki/AD_${year}`)
.reply(200, mockData)

const result = await fetch_people_in(year)
const result = extract_people_from_html(mockData)

expect(result).toEqual(
[{
Expand Down
35 changes: 18 additions & 17 deletions src/fetch.tsx
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
import axios from 'axios'
import * as cheerio from 'cheerio'

const WIKIPEDIA_AD_URL = 'https://en.wikipedia.org/wiki/AD_'
// const WIKIPEDIA_AD_URL = 'https://cors-anywhere.herokuapp.com/https://en.wikipedia.org/wiki/AD_'
import wiki from 'wikipedia'

export async function fetch_people_in(year: number) {
return axios.get(WIKIPEDIA_AD_URL + year).then((response) => {
const $ = cheerio.load(response.data)
const birthsHeader = $('#Births').parent()
const births = birthsHeader.nextUntil('h2').find('li').map((_, el) => {
const desc = $(el).text()
const link = $(el).find('a').attr('href')
const deathMatch = desc.match(/\(d\. \d+/i)?.[0]
const death = deathMatch ? Number.parseInt(deathMatch.substring(3)) : undefined
return { desc, link, death }
},
).get()
return births
})
const page = await wiki.page(`AD_${year}`)
const html = await page.html({ redirect: true })
return extract_people_from_html(html)
}

export function extract_people_from_html(html: string) {
const $ = cheerio.load(html)
const birthsHeader = $('#Births').parent()
const births = birthsHeader.nextUntil('h2').find('li').map((_, el) => {
const desc = $(el).text()
const link = $(el).find('a').attr('href')
const deathMatch = desc.match(/\(d\. \d+/i)?.[0]
const death = deathMatch ? Number.parseInt(deathMatch.substring(3)) : undefined
return { desc, link, death }
}).get()

return births
}
2 changes: 1 addition & 1 deletion src/list.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ export async function build_list(start_year: number, end_year: number) {
other_people: people_born,
})

year = random_person.death!
year = random_person.death! + 500
}

return list
Expand Down

0 comments on commit 5509f32

Please sign in to comment.