From 59daaeef5988d039a18bcda26e7b40f2f4f7ea7f Mon Sep 17 00:00:00 2001
From: lashoun <lashoun+github@posteo.net>
Date: Tue, 3 Mar 2020 09:28:09 +0100
Subject: [PATCH 1/5] update gitignore

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 5d581a7..7929e76 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 wiktionaryparser/notes.txt
 notes.txt
+tests.ipynb
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
@@ -110,4 +111,4 @@ venv.bak/
 # mypy
 .mypy_cache/
 .dmypy.json
-dmypy.json
\ No newline at end of file
+dmypy.json

From c911188b3943f862a59d9a8c016c89d769b2b2bb Mon Sep 17 00:00:00 2001
From: lashoun <lashoun+github@posteo.net>
Date: Tue, 3 Mar 2020 09:28:27 +0100
Subject: [PATCH 2/5] added hardcoded french support

---
 wiktionaryparser.py | 83 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 60 insertions(+), 23 deletions(-)

diff --git a/wiktionaryparser.py b/wiktionaryparser.py
index bcc68d2..7cfd2ee 100644
--- a/wiktionaryparser.py
+++ b/wiktionaryparser.py
@@ -1,24 +1,45 @@
-import re, requests
+import re
+import requests
 from utils import WordData, Definition, RelatedWord
 from bs4 import BeautifulSoup
 from itertools import zip_longest
 from copy import copy
 from string import digits
 
-PARTS_OF_SPEECH = [
-    "noun", "verb", "adjective", "adverb", "determiner",
-    "article", "preposition", "conjunction", "proper noun",
-    "letter", "character", "phrase", "proverb", "idiom",
-    "symbol", "syllable", "numeral", "initialism", "interjection",
-    "definitions", "pronoun", "particle", "predicative", "participle",
-    "suffix",
-]
+PARTS_OF_SPEECH = {
+    "english": [
+        "noun", "verb", "adjective", "adverb", "determiner",
+        "article", "preposition", "conjunction", "proper noun",
+        "letter", "character", "phrase", "proverb", "idiom",
+        "symbol", "syllable", "numeral", "initialism", "interjection",
+        "definitions", "pronoun", "particle", "predicative", "participle",
+        "suffix"
+    ],
+    "français": [
+        "nom commun", "verbe", "adjectif", "adverbe", "déterminant",
+        "article", "preposition", "conjonction", "nom propre",
+        "lettre", "caractère", "expression", "proverbe", "idiome",
+        "symbole", "syllabe", "nombre", "acronyme", "interjection",
+        "définitions", "pronom", "particule", "prédicat", "participe",
+        "suffixe", "locution nominale"
+    ],
+}
+
+RELATIONS = {
+    "english": [
+        "synonyms", "antonyms", "hypernyms", "hyponyms",
+        "meronyms", "holonyms", "troponyms", "related terms",
+        "coordinate terms",
+    ],
+    "français": [
+        "synonymes", "antonymes", "hypéronymes", "hyponymes",
+        "méronymes", "holonymes", "paronymes", "troponymes",
+        "vocabulaire apparenté par le sens", "dérivés",
+        "anagrammes", "proverbes et phrases toutes faites",
+        "apparentés étymologiques", "quasi-synonymes"
+    ]
+}
 
-RELATIONS = [
-    "synonyms", "antonyms", "hypernyms", "hyponyms",
-    "meronyms", "holonyms", "troponyms", "related terms",
-    "coordinate terms",
-]
 
 def is_subheading(child, parent):
     child_headings = child.split(".")
@@ -30,18 +51,27 @@ def is_subheading(child, parent):
             return False
     return True
 
+
 class WiktionaryParser(object):
-    def __init__(self):
-        self.url = "https://en.wiktionary.org/wiki/{}?printable=yes"
+    def __init__(self, language="français"):
         self.soup = None
         self.session = requests.Session()
-        self.session.mount("http://", requests.adapters.HTTPAdapter(max_retries = 2))
-        self.session.mount("https://", requests.adapters.HTTPAdapter(max_retries = 2))
-        self.language = 'english'
+        self.session.mount("http://", requests.adapters.HTTPAdapter(max_retries=2))
+        self.session.mount("https://", requests.adapters.HTTPAdapter(max_retries=2))
         self.current_word = None
-        self.PARTS_OF_SPEECH = copy(PARTS_OF_SPEECH)
-        self.RELATIONS = copy(RELATIONS)
-        self.INCLUDED_ITEMS = self.RELATIONS + self.PARTS_OF_SPEECH + ['etymology', 'pronunciation']
+
+        if language == "français":
+            self.language = 'français'
+            self.url = "https://fr.wiktionary.org/wiki/{}?printable=yes"
+            self.PARTS_OF_SPEECH = copy(PARTS_OF_SPEECH["français"])
+            self.RELATIONS = copy(RELATIONS["français"])
+            self.INCLUDED_ITEMS = self.RELATIONS + self.PARTS_OF_SPEECH + ['étymologie', 'prononciation']
+        else:
+            self.language = 'english'
+            self.url = "https://en.wiktionary.org/wiki/{}?printable=yes"
+            self.PARTS_OF_SPEECH = copy(PARTS_OF_SPEECH["english"])
+            self.RELATIONS = copy(RELATIONS["english"])
+            self.INCLUDED_ITEMS = self.RELATIONS + self.PARTS_OF_SPEECH + ['etymology', 'pronunciation']
 
     def include_part_of_speech(self, part_of_speech):
         part_of_speech = part_of_speech.lower()
@@ -86,8 +116,12 @@ def count_digits(self, string):
     def get_id_list(self, contents, content_type):
         if content_type == 'etymologies':
             checklist = ['etymology']
+            if self.language == "français":
+                checklist = ['étymologie']
         elif content_type == 'pronunciation':
             checklist = ['pronunciation']
+            if self.language == "français":
+                checklist = ['prononciation']
         elif content_type == 'definitions':
             checklist = self.PARTS_OF_SPEECH
             if self.language == 'chinese':
@@ -192,7 +226,10 @@ def parse_examples(self, word_contents):
                 table = table.find_next_sibling()
             examples = []
             while table and table.name == 'ol':
-                for element in table.find_all('dd'):
+                example_delim = 'dd'
+                if self.language == "français":
+                    example_delim = 'i'
+                for element in table.find_all(example_delim):
                     example_text = re.sub(r'\([^)]*\)', '', element.text.strip())
                     if example_text:
                         examples.append(example_text)

From f2035992a05f23ba39ed0c98d397683e60301ae7 Mon Sep 17 00:00:00 2001
From: lashoun <lashoun+github@posteo.net>
Date: Tue, 3 Mar 2020 09:31:39 +0100
Subject: [PATCH 3/5] update readme

---
 readme.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index 8905e96..cdac2f2 100644
--- a/readme.md
+++ b/readme.md
@@ -2,6 +2,7 @@
 
 A python project which parses word content from Wiktionary in an easy to use JSON format.
 Right now, it parses etymologies, definitions, pronunciations, examples, audio links and related words.
+Only English and French Wiktionaries are supported.
 
 [![Downloads](http://pepy.tech/badge/wiktionaryparser)](http://pepy.tech/project/wiktionaryparser)
 
@@ -54,6 +55,10 @@ Right now, it parses etymologies, definitions, pronunciations, examples, audio l
 >>> parser.set_default_language('french')
 >>> parser.exclude_part_of_speech('noun')
 >>> parser.include_relation('alternative forms')
+>>> 
+>>> parser_fr = WiktionaryParser(language="français")
+>>> word = parser_fr.fetch('test')
+>>> word = parser_fr.fetch('test', 'anglais')
 ```
 
 #### Requirements
@@ -67,4 +72,4 @@ If you want to add features/improvement or report issues, feel free to send a pu
 
 #### License
 
-Wiktionary Parser is licensed under [MIT](LICENSE.txt).
\ No newline at end of file
+Wiktionary Parser is licensed under [MIT](LICENSE.txt).

From b2c21e108a0dc11ad761596608d55c111c3c878c Mon Sep 17 00:00:00 2001
From: lashoun <lashoun+github@posteo.net>
Date: Tue, 3 Mar 2020 09:36:17 +0100
Subject: [PATCH 4/5] made english default language

---
 wiktionaryparser.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/wiktionaryparser.py b/wiktionaryparser.py
index 7cfd2ee..7badcec 100644
--- a/wiktionaryparser.py
+++ b/wiktionaryparser.py
@@ -53,7 +53,7 @@ def is_subheading(child, parent):
 
 
 class WiktionaryParser(object):
-    def __init__(self, language="français"):
+    def __init__(self, language="english"):
         self.soup = None
         self.session = requests.Session()
         self.session.mount("http://", requests.adapters.HTTPAdapter(max_retries=2))
@@ -67,6 +67,8 @@ def __init__(self, language="français"):
             self.RELATIONS = copy(RELATIONS["français"])
             self.INCLUDED_ITEMS = self.RELATIONS + self.PARTS_OF_SPEECH + ['étymologie', 'prononciation']
         else:
+            if language != "english":
+                print("language unsupported, switched to english")
             self.language = 'english'
             self.url = "https://en.wiktionary.org/wiki/{}?printable=yes"
             self.PARTS_OF_SPEECH = copy(PARTS_OF_SPEECH["english"])

From 1497d7debbd27783bbe9996529f1a53f2f2de988 Mon Sep 17 00:00:00 2001
From: lashoun <lashoun+github@posteo.net>
Date: Tue, 3 Mar 2020 09:37:27 +0100
Subject: [PATCH 5/5] fixed readme

---
 readme.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/readme.md b/readme.md
index cdac2f2..9c2eb1d 100644
--- a/readme.md
+++ b/readme.md
@@ -55,7 +55,7 @@ Only English and French Wiktionaries are supported.
 >>> parser.set_default_language('french')
 >>> parser.exclude_part_of_speech('noun')
 >>> parser.include_relation('alternative forms')
->>> 
+
 >>> parser_fr = WiktionaryParser(language="français")
 >>> word = parser_fr.fetch('test')
 >>> word = parser_fr.fetch('test', 'anglais')
@@ -72,4 +72,4 @@ If you want to add features/improvement or report issues, feel free to send a pu
 
 #### License
 
-Wiktionary Parser is licensed under [MIT](LICENSE.txt).
+    Wiktionary Parser is licensed under [MIT](LICENSE.txt).