-
Notifications
You must be signed in to change notification settings - Fork 0
80 lines (69 loc) · 3.13 KB
/
scrape.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
name: Scrape canonical sources
on:
schedule:
- cron: "0 4 * * *"
env:
ENV DEBIAN_FRONTEND: noninteractive
TLP_URL: https://trustee.ietf.org/assets/licenses/license-for-open-source-repositories/
TLP_START: "^Official boilerplate text:"
TLP_END: "^Search for:"
NOTEWELL_URL: https://www.ietf.org/about/note-well/
NOTEWELL_START: "^A reminder of IETF policies."
NOTEWELL_END: "^--------"
jobs:
scrape:
runs-on: ubuntu-latest
steps:
- name: Install dependencies
run: sudo apt-get -y install --no-install-recommends w3m pandoc
- name: Checkout repository
uses: actions/checkout@v3
- name: Scrape TLP
run: |
w3m -no-graph -cols 999999 -dump $TLP_URL |
sed -E -e "1,/$TLP_START/d; /$TLP_END/,\$d" > /tmp/tlp.txt
sed -E -e "s|\s+| |g" \
-e "s|http:|https:|g" \
-e "s|e.g. |e.g., |g" \
-e "s|policies of IETF|policies of the IETF|g" \
-e "s|Simplified BSD|Revised BSD|g" \
-e "s|(IETF Trust Legal Provisions \(TLP\) Relating to IETF Documents) \(|[\1](|g" \
-e "s|(IETF Standards Process) \(|[\1](|g" \
-e "s|(BCP [0-9]+) \(|[\1](|g" \
-e "s|www.ietf.org/|www.rfc-editor.org/info/bcp9|g" \
-e "s|(Internet Engineering Task Force \(IETF\))|[\1](\https://www.ietf.org/)|g" /tmp/tlp.txt |
sed -E -e "1s/^/# License\n/" |
pandoc -f markdown -t gfm+smart > LICENSE.md
cat LICENSE.md
- name: Scrape Note Well
run: |
w3m -no-graph -cols 999999 -dump $NOTEWELL_URL |
sed -E -e "1,/$NOTEWELL_START/d; /$NOTEWELL_END/,\$d" > /tmp/notewell.txt
sed -E -e "s|\s+| |g" \
-e "s|IETF policies|[Internet Engineering Task Force (IETF)](https://www.ietf.org/) policies|" \
-e "s|(ombudsteam) \(|[\1](|g" \
-e "s|BCP ([0-9]+)|[BCP \1](https://www.rfc-editor.org/info/bcp\1)|g" \
-e "s|https://www.ietf.org/privacy-policy/.*|[Privacy Policy](https://www.ietf.org/privacy-policy/)|g" \
-e "s|\s+•|*|g" /tmp/notewell.txt |
sed -E -e "1s/^/# Note Well\n/" |
pandoc -f markdown -t gfm+smart > NOTE-WELL.md
cat NOTE-WELL.md
- name: Check diff
run: |
git status
git diff
- name: Create PR
uses: peter-evans/create-pull-request@v3
with:
commit-message: Update Markdown from canonical sources
branch: update-from-canonical-sources
delete-branch: true
base: main
title: 'Update Markdown from canonical sources'
body: |
Changes were detected to the canonical sources of some of the files in this repository.
This PR was auto-generated by a GitHub Action using [create-pull-request](https://github.com/peter-evans/create-pull-request).
- name: Check outputs
run: |
echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}"
echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}"