Skip to content

Commit

Permalink
Merge pull request #379 from extractus/dev
Browse files Browse the repository at this point in the history
v8.0.5
  • Loading branch information
ndaidong authored Jan 22, 2024
2 parents 986a409 + b438895 commit 660bc92
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 24 deletions.
19 changes: 16 additions & 3 deletions .github/workflows/ci-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
node_version: [18.x, 20.x, 21.x]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: setup Node.js v${{ matrix.node_version }}
uses: actions/setup-node@v4
Expand All @@ -31,15 +31,28 @@ jobs:
npm run build --if-present
npm run test
- name: Report Coveralls
- name: Coveralls Parallel
uses: coverallsapp/github-action@v2
with:
flag-name: run-${{ join(matrix.*, '-') }}
parallel: true
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: cache node modules
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-
finish:
needs: test
if: ${{ always() }}
runs-on: ubuntu-latest
steps:
- name: Coveralls Finished
uses: coverallsapp/github-action@v2
with:
parallel-finished: true
carryforward: "run-18.x,run-20.x,run-21.x"
10 changes: 5 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "8.0.4",
"version": "8.0.5",
"name": "@extractus/article-extractor",
"description": "To extract main article from given URL",
"homepage": "https://github.com/extractus/article-extractor",
Expand Down Expand Up @@ -30,18 +30,18 @@
"reset": "node reset"
},
"dependencies": {
"@mozilla/readability": "^0.4.4",
"@mozilla/readability": "^0.5.0",
"bellajs": "^11.1.2",
"cross-fetch": "^4.0.0",
"linkedom": "^0.16.4",
"linkedom": "^0.16.6",
"sanitize-html": "2.11.0"
},
"devDependencies": {
"@types/sanitize-html": "^2.9.5",
"eslint": "^8.55.0",
"eslint": "^8.56.0",
"https-proxy-agent": "^7.0.2",
"jest": "^29.7.0",
"nock": "^13.4.0"
"nock": "^13.5.0"
},
"keywords": [
"article",
Expand Down
39 changes: 23 additions & 16 deletions src/utils/extractLdSchema.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ const attributeLists = {
type: '@type',
}

const parseJson = (text) => {
try {
return JSON.parse(text)
} catch {
return null
}
}

/**
* Parses JSON-LD data from a document and populates an entry object.
* Only populates if the original entry object is empty or undefined.
Expand All @@ -46,26 +54,25 @@ const attributeLists = {
export default (document, entry) => {
const ldSchema = document.querySelector('script[type="application/ld+json"]')?.textContent

if (!ldSchema) {
return entry
}
const ldJson = ldSchema ? parseJson(ldSchema) : null

const ldJson = JSON.parse(ldSchema)
Object.entries(attributeLists).forEach(([key, attr]) => {
if ((typeof entry[key] === 'undefined' || entry[key] === '') && ldJson[attr]) {
if (key === 'type' && typeof ldJson[attr] === 'string') {
return entry[key] = typeSchemas.includes(ldJson[attr].toLowerCase()) ? ldJson[attr].toLowerCase() : ''
}
if (ldJson) {
Object.entries(attributeLists).forEach(([key, attr]) => {
if ((typeof entry[key] === 'undefined' || entry[key] === '') && ldJson[attr]) {
if (key === 'type' && typeof ldJson[attr] === 'string') {
return entry[key] = typeSchemas.includes(ldJson[attr].toLowerCase()) ? ldJson[attr].toLowerCase() : ''
}

if (typeof ldJson[attr] === 'string') {
return entry[key] = ldJson[attr].toLowerCase()
}
if (typeof ldJson[attr] === 'string') {
return entry[key] = ldJson[attr].toLowerCase()
}

if (Array.isArray(ldJson[attr]) && typeof ldJson[attr][0] === 'string') {
return entry[key] = ldJson[attr][0].toLowerCase()
if (Array.isArray(ldJson[attr]) && typeof ldJson[attr][0] === 'string') {
return entry[key] = ldJson[attr][0].toLowerCase()
}
}
}
})
})
}

return entry
}

0 comments on commit 660bc92

Please sign in to comment.