Skip to content

Commit

Permalink
labels
Browse files Browse the repository at this point in the history
  • Loading branch information
Ash-Crow committed Nov 25, 2015
1 parent d698690 commit 82c8c06
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 16 deletions.
22 changes: 7 additions & 15 deletions sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,16 @@
sparql = SPARQLWrapper("https://query.wikidata.org/bigdata/namespace/wdq/sparql")
sparql.setQuery("""
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT DISTINCT ?episode ?episodeLabel ?number ?date WHERE {
?episode wdt:P31 wd:Q838795 .
?episode wdt:P361 wd:Q13915 .
?episode wdt:P433 ?number .
?episode wdt:P577 ?date
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
} ORDER BY xsd:integer(?number)
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT ?l WHERE {
wd:Q18615489 rdfs:label ?l .
}
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

langs = []
for result in results["results"]["bindings"]:
print(result["episode"]["value"], result["episodeLabel"]["value"])
langs.append(result["l"]["xml:lang"])

print(langs)
36 changes: 36 additions & 0 deletions xkcd-labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from SPARQLWrapper import SPARQLWrapper, JSON

all_langs=['af', 'an', 'ast', 'bar', 'bm', 'br', 'ca', 'co', 'cs', 'cy', 'da', 'de', 'de-at', 'de-ch', 'en',
'en-ca', 'en-gb', 'eo', 'es', 'et', 'eu', 'fi', 'fr', 'frc', 'frp', 'fur', 'ga', 'gd', 'gl', 'gsw', 'hr',
'hu', 'ia', 'id', 'ie', 'io', 'it', 'jam', 'kab', 'kg', 'lb', 'li', 'lij', 'lt', 'lv', 'mg', 'min', 'ms',
'nap', 'nb', 'nds', 'nds-nl', 'nl', 'nn', 'nrm', 'oc', 'pap', 'pcd', 'pl', 'pms', 'prg', 'pt', 'pt-br',
'rgn', 'rm', 'ro', 'sc', 'scn', 'sco', 'sk', 'sr-el', 'sv', 'sw', 'tr', 'vec', 'vi', 'vls', 'vmf', 'vo',
'wa', 'wo', 'zu', 'fo', 'is', 'kl']

for i in range(1,1609):
sparql = SPARQLWrapper("https://query.wikidata.org/bigdata/namespace/wdq/sparql")
sparql.setQuery("""
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?episode ?number ?title WHERE {
?episode wdt:P31 wd:Q838795 .
?episode wdt:P361 wd:Q13915 .
?episode wdt:P433 '{}' .
?episode rdfs:label ?title .
}
""".format(i))
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

used_langs = []
for result in results["results"]["bindings"]:

episode = result['episode']['value'].rsplit('/',1)[-1]
lang = result["title"]["xml:lang"]
title = result["title"]['value']
used_langs.append(lang)

missing_langs = set(all_langs) - set(used_langs)

for m in missing_langs:
print(episode,'L'+ lang,title)
2 changes: 1 addition & 1 deletion xkcd-scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def ordinal(value):
episodenumber = urlbit.replace("/","")

if int(episodenumber) > latest_imported_episode:
descriptions = "strip de xkcd n° " + episodenumber + ", Folge des Webcomics xkcd, " + ordinal(episodenumber) + " strip of the webcomic xkcd"
descriptions = "strip de xkcd n°" + episodenumber + ", Folge des Webcomics xkcd, " + ordinal(episodenumber) + " strip of the webcomic xkcd"
#date = a.attrs.get('title') or ""
date = "+0000000" + '-'.join(["{0:0>2}".format(v) for v in a.attrs.get('title').split("-")]) + "T00:00:00Z/11" or ""

Expand Down

0 comments on commit 82c8c06

Please sign in to comment.