-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrun_event_data.py
106 lines (100 loc) · 4.06 KB
/
run_event_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from ames.harvesters import get_crossref_refs
from ames.harvesters import get_caltechdata
from ames.matchers import match_cd_refs
from xml.sax import saxutils as su
import os
import requests
from py_dataset import dataset
# Environment variable AWS_SDK_LOAD_CONFIG=1 must be set before running
def send_simple_message(token, matched):
matched_key = matched[0]
matched_dois = matched[1]
# Use raw api call to get email
api_url = "https://data.caltech.edu/api/record/"
r = requests.get(api_url + matched_key)
r_data = r.json()
if "message" in r_data:
raise AssertionError(
"id "
+ matched_key
+ " expected http status 200, got "
+ r_data.status
+ r_data.message
)
if not "metadata" in r_data:
raise AssertionError("expected as metadata property in response, got " + r_data)
metadata = r_data["metadata"]
email = ""
name = ""
if "contributors" in metadata:
for c in metadata["contributors"]:
if c["contributorType"] == "ContactPerson":
if "contributorEmail" in c:
email = c["contributorEmail"]
name = c["contributorName"]
if email == "":
print("Missing email for record ", matched_key)
else:
# Use dataset version to get datacite metadata
metadata, err = dataset.read("caltechdata.ds", matched_key)
if err != "":
print(f"Unexpected error on read: {err}")
exit()
title = metadata["titles"][0]["title"]
doi = metadata["identifier"]["identifier"]
headers = {"Accept": "text/bibliography;style=apa"}
citation_block = ""
for matched in matched_dois:
citation = requests.get(matched, headers=headers)
citation.encoding = "utf-8"
citation = citation.text
citation = su.unescape(citation)
citation_block = citation_block + "<p>" + citation + "</p>"
# Send email
return requests.post(
"https://api.mailgun.net/v3/notices.caltechlibrary.org/messages",
auth=("api", token),
files=[("inline", open("CaltechDATA_Logo_cropped.png", "rb"))],
data={
"from": "CaltechDATA Notices <[email protected]>",
"to": name + " <" + email + ">, Tom Morrell <[email protected]>",
"subject": "Your CaltechDATA Work has been cited!",
"html": '<html> <center> <img src="cid:CaltechDATA_Logo_cropped.png"\
alt="CaltechDATA Logo" width="249" height="69"> </center> \
<p> Dear '
+ name
+ ', </p>\
<p>Your CaltechDATA work "'
+ title
+ '" has been cited\
in:</p>'
+ citation_block
+ '<p>The\
citation(s) are now listed in your CaltechDATA record at \
<a href="https://doi.org/'
+ doi
+ '">'
+ doi
+ '</a>.</p>\
<p> Best, </p><p>CaltechDATA Alerting Service</p><hr>\
<p> Is this incorrect? Let us know at\
<a href="mailto:[email protected]?Subject=Issue%20with%20citation%20link%20between%20'
+ doi
+ "%20and%20"
+ ",".join(matched_dois)
+ '">[email protected]</a></p>\
<P> This email was sent by the Caltech Library, \
1200 East California Blvd., MC 1-43, Pasadena, CA 91125, USA </p> </html>',
},
)
if __name__ == "__main__":
if os.path.isdir("data") == False:
os.mkdir("data")
os.chdir("data")
get_crossref_refs("10.14291", done=False, new=False)
get_crossref_refs("10.22002", done=True, new=False)
get_caltechdata("caltechdata.ds")
matches = match_cd_refs()
for m in matches:
token = os.environ["MAILTOK"]
send_simple_message(token, m)