csurfer · calixo888 · Oct 22, 2019 · Oct 22, 2019
diff --git a/rake_nltk/rake.py b/rake_nltk/rake.py
@@ -27,6 +27,7 @@ class Rake(object):
 
     def __init__(
         self,
+        text,
         stopwords=None,
         punctuations=None,
         language="english",
@@ -44,11 +45,12 @@ def __init__(
         :param min_length: Minimum limit on the number of words in a phrase
                            (Inclusive. Defaults to 1)
         """
+
         # By default use degree to frequency ratio as the metric.
         if isinstance(ranking_metric, Metric):
-            self.metric = ranking_metric
+            self.__metric = ranking_metric
         else:
-            self.metric = Metric.DEGREE_TO_FREQUENCY_RATIO
+            self.__metric = Metric.DEGREE_TO_FREQUENCY_RATIO
 
         # If stopwords not provided we use language stopwords by default.
         self.stopwords = stopwords
@@ -61,27 +63,41 @@ def __init__(
             self.punctuations = string.punctuation
 
         # All things which act as sentence breaks during keyword extraction.
-        self.to_ignore = set(chain(self.stopwords, self.punctuations))
+        self.__to_ignore = set(chain(self.stopwords, self.punctuations))
 
-        # Assign min or max length to the attributes
-        self.min_length = min_length
-        self.max_length = max_length
+        self.__min_length = min_length
+        self.__max_length = max_length
 
         # Stuff to be extracted from the provided text.
         self.frequency_dist = None
-        self.degree = None
+        self.word_degrees = None
         self.rank_list = None
         self.ranked_phrases = None
 
-    def extract_keywords_from_text(self, text):
-        """Method to extract keywords from the text provided.
+        # Initializing the text and building all the fields
+        self.set_text(text)
 
-        :param text: Text to extract keywords from, provided as a string.
-        """
+        # You don't need all of the getter methods, you just need to call these fields off the Rake object
+        # Fields to call:
+        # - self.ranked_phrases
+        # - self.rank_list
+        # - self.frequency_dist
+        # - self.word_degrees
+
+    def set_text(self, text):
+        self.text = text
         sentences = nltk.tokenize.sent_tokenize(text)
-        self.extract_keywords_from_sentences(sentences)
+        self._extract_keywords_from_sentences(sentences)
+
+    def set_stopwords(self, stopwords):
+        self.stopwords = stopwords
+        self.set_text(self.text)
 
-    def extract_keywords_from_sentences(self, sentences):
+    def set_punctuations(self, punctuations):
+        self.punctuations = punctuations
+        self.set_text(self.text)
+
+    def _extract_keywords_from_sentences(self, sentences):
         """Method to extract keywords from the list of sentences provided.
 
         :param sentences: Text to extraxt keywords from, provided as a list
@@ -92,38 +108,6 @@ def extract_keywords_from_sentences(self, sentences):
         self._build_word_co_occurance_graph(phrase_list)
         self._build_ranklist(phrase_list)
 
-    def get_ranked_phrases(self):
-        """Method to fetch ranked keyword strings.
-
-        :return: List of strings where each string represents an extracted
-                 keyword string.
-        """
-        return self.ranked_phrases
-
-    def get_ranked_phrases_with_scores(self):
-        """Method to fetch ranked keyword strings along with their scores.
-
-        :return: List of tuples where each tuple is formed of an extracted
-                 keyword string and its score. Ex: (5.68, 'Four Scoures')
-        """
-        return self.rank_list
-
-    def get_word_frequency_distribution(self):
-        """Method to fetch the word frequency distribution in the given text.
-
-        :return: Dictionary (defaultdict) of the format `word -> frequency`.
-        """
-        return self.frequency_dist
-
-    def get_word_degrees(self):
-        """Method to fetch the degree of words in the given text. Degree can be
-        defined as sum of co-occurances of the word with other words in the
-        given text.
-
-        :return: Dictionary (defaultdict) of the format `word -> degree`.
-        """
-        return self.degree
-
     def _build_frequency_dist(self, phrase_list):
         """Builds frequency distribution of the words in the given body of text.
 
@@ -148,9 +132,9 @@ def _build_word_co_occurance_graph(self, phrase_list):
             # use in other creative ways if required later.
             for (word, coword) in product(phrase, phrase):
                 co_occurance_graph[word][coword] += 1
-        self.degree = defaultdict(lambda: 0)
+        self.word_degrees = defaultdict(lambda: 0)
         for key in co_occurance_graph:
-            self.degree[key] = sum(co_occurance_graph[key].values())
+            self.word_degrees[key] = sum(co_occurance_graph[key].values())
 
     def _build_ranklist(self, phrase_list):
         """Method to rank each contender phrase using the formula
@@ -165,10 +149,10 @@ def _build_ranklist(self, phrase_list):
         for phrase in phrase_list:
             rank = 0.0
             for word in phrase:
-                if self.metric == Metric.DEGREE_TO_FREQUENCY_RATIO:
-                    rank += 1.0 * self.degree[word] / self.frequency_dist[word]
-                elif self.metric == Metric.WORD_DEGREE:
-                    rank += 1.0 * self.degree[word]
+                if self.__metric == Metric.DEGREE_TO_FREQUENCY_RATIO:
+                    rank += 1.0 * self.word_degrees[word] / self.frequency_dist[word]
+                elif self.__metric == Metric.WORD_DEGREE:
+                    rank += 1.0 * self.word_degrees[word]
                 else:
                     rank += 1.0 * self.frequency_dist[word]
             self.rank_list.append((rank, " ".join(phrase)))
@@ -213,10 +197,10 @@ def _get_phrase_list_from_words(self, word_list):
         :return: List of contender phrases that are formed after dropping
                  stopwords and punctuations.
         """
-        groups = groupby(word_list, lambda x: x not in self.to_ignore)
+        groups = groupby(word_list, lambda x: x not in self.__to_ignore)
         phrases = [tuple(group[1]) for group in groups if group[0]]
         return list(
             filter(
-                lambda x: self.min_length <= len(x) <= self.max_length, phrases
+                lambda x: self.__min_length <= len(x) <= self.__max_length, phrases
             )
         )