suyashb95 · lashoun · Mar 3, 2020 · Mar 3, 2020 · Mar 3, 2020 · Mar 3, 2020
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 wiktionaryparser/notes.txt
 notes.txt
+tests.ipynb
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
@@ -110,4 +111,4 @@ venv.bak/
 # mypy
 .mypy_cache/
 .dmypy.json
-dmypy.json
+dmypy.json
diff --git a/readme.md b/readme.md
@@ -2,6 +2,7 @@
 
 A python project which parses word content from Wiktionary in an easy to use JSON format.
 Right now, it parses etymologies, definitions, pronunciations, examples, audio links and related words.
+Only English and French Wiktionaries are supported.
 
 [![Downloads](http://pepy.tech/badge/wiktionaryparser)](http://pepy.tech/project/wiktionaryparser)
 
@@ -54,6 +55,10 @@ Right now, it parses etymologies, definitions, pronunciations, examples, audio l
 >>> parser.set_default_language('french')
 >>> parser.exclude_part_of_speech('noun')
 >>> parser.include_relation('alternative forms')
+
+>>> parser_fr = WiktionaryParser(language="français")
+>>> word = parser_fr.fetch('test')
+>>> word = parser_fr.fetch('test', 'anglais')
 ```
 
 #### Requirements
@@ -67,4 +72,4 @@ If you want to add features/improvement or report issues, feel free to send a pu
 
 #### License
 
-Wiktionary Parser is licensed under [MIT](LICENSE.txt).
+    Wiktionary Parser is licensed under [MIT](LICENSE.txt).
diff --git a/wiktionaryparser.py b/wiktionaryparser.py
@@ -1,24 +1,45 @@
-import re, requests
+import re
+import requests
 from utils import WordData, Definition, RelatedWord
 from bs4 import BeautifulSoup
 from itertools import zip_longest
 from copy import copy
 from string import digits
 
-PARTS_OF_SPEECH = [
-    "noun", "verb", "adjective", "adverb", "determiner",
-    "article", "preposition", "conjunction", "proper noun",
-    "letter", "character", "phrase", "proverb", "idiom",
-    "symbol", "syllable", "numeral", "initialism", "interjection",
-    "definitions", "pronoun", "particle", "predicative", "participle",
-    "suffix",
-]
+PARTS_OF_SPEECH = {
+    "english": [
+        "noun", "verb", "adjective", "adverb", "determiner",
+        "article", "preposition", "conjunction", "proper noun",
+        "letter", "character", "phrase", "proverb", "idiom",
+        "symbol", "syllable", "numeral", "initialism", "interjection",
+        "definitions", "pronoun", "particle", "predicative", "participle",
+        "suffix"
+    ],
+    "français": [
+        "nom commun", "verbe", "adjectif", "adverbe", "déterminant",
+        "article", "preposition", "conjonction", "nom propre",
+        "lettre", "caractère", "expression", "proverbe", "idiome",
+        "symbole", "syllabe", "nombre", "acronyme", "interjection",
+        "définitions", "pronom", "particule", "prédicat", "participe",
+        "suffixe", "locution nominale"
+    ],
+}
+
+RELATIONS = {
+    "english": [
+        "synonyms", "antonyms", "hypernyms", "hyponyms",
+        "meronyms", "holonyms", "troponyms", "related terms",
+        "coordinate terms",
+    ],
+    "français": [
+        "synonymes", "antonymes", "hypéronymes", "hyponymes",
+        "méronymes", "holonymes", "paronymes", "troponymes",
+        "vocabulaire apparenté par le sens", "dérivés",
+        "anagrammes", "proverbes et phrases toutes faites",
+        "apparentés étymologiques", "quasi-synonymes"
+    ]
+}
 
-RELATIONS = [
-    "synonyms", "antonyms", "hypernyms", "hyponyms",
-    "meronyms", "holonyms", "troponyms", "related terms",
-    "coordinate terms",
-]
 
 def is_subheading(child, parent):
     child_headings = child.split(".")
@@ -30,18 +51,29 @@ def is_subheading(child, parent):
             return False
     return True
 
+
 class WiktionaryParser(object):
-    def __init__(self):
-        self.url = "https://en.wiktionary.org/wiki/{}?printable=yes"
+    def __init__(self, language="english"):
         self.soup = None
         self.session = requests.Session()
-        self.session.mount("http://", requests.adapters.HTTPAdapter(max_retries = 2))
-        self.session.mount("https://", requests.adapters.HTTPAdapter(max_retries = 2))
-        self.language = 'english'
+        self.session.mount("http://", requests.adapters.HTTPAdapter(max_retries=2))
+        self.session.mount("https://", requests.adapters.HTTPAdapter(max_retries=2))
         self.current_word = None
-        self.PARTS_OF_SPEECH = copy(PARTS_OF_SPEECH)
-        self.RELATIONS = copy(RELATIONS)
-        self.INCLUDED_ITEMS = self.RELATIONS + self.PARTS_OF_SPEECH + ['etymology', 'pronunciation']
+
+        if language == "français":
+            self.language = 'français'
+            self.url = "https://fr.wiktionary.org/wiki/{}?printable=yes"
+            self.PARTS_OF_SPEECH = copy(PARTS_OF_SPEECH["français"])
+            self.RELATIONS = copy(RELATIONS["français"])
+            self.INCLUDED_ITEMS = self.RELATIONS + self.PARTS_OF_SPEECH + ['étymologie', 'prononciation']
+        else:
+            if language != "english":
+                print("language unsupported, switched to english")
+            self.language = 'english'
+            self.url = "https://en.wiktionary.org/wiki/{}?printable=yes"
+            self.PARTS_OF_SPEECH = copy(PARTS_OF_SPEECH["english"])
+            self.RELATIONS = copy(RELATIONS["english"])
+            self.INCLUDED_ITEMS = self.RELATIONS + self.PARTS_OF_SPEECH + ['etymology', 'pronunciation']
 
     def include_part_of_speech(self, part_of_speech):
         part_of_speech = part_of_speech.lower()
@@ -86,8 +118,12 @@ def count_digits(self, string):
     def get_id_list(self, contents, content_type):
         if content_type == 'etymologies':
             checklist = ['etymology']
+            if self.language == "français":
+                checklist = ['étymologie']
         elif content_type == 'pronunciation':
             checklist = ['pronunciation']
+            if self.language == "français":
+                checklist = ['prononciation']
         elif content_type == 'definitions':
             checklist = self.PARTS_OF_SPEECH
             if self.language == 'chinese':
@@ -192,7 +228,10 @@ def parse_examples(self, word_contents):
                 table = table.find_next_sibling()
             examples = []
             while table and table.name == 'ol':
-                for element in table.find_all('dd'):
+                example_delim = 'dd'
+                if self.language == "français":
+                    example_delim = 'i'
+                for element in table.find_all(example_delim):
                     example_text = re.sub(r'\([^)]*\)', '', element.text.strip())
                     if example_text:
                         examples.append(example_text)