LDA_and_Federalists

import numpy as np
import lda 
import nltk

def read_files_into_string(filenames):
  strings= []
  for filename in filenames:
    with open(f'federalist_{filename}.txt') as f:
      strings.append(f.read())
  return '\n'.join(strings)

federalist_by_author = {}

for author, files in papers.items():
  federalist_by_author[author] = read_files_into_string(files)


text= str(federalist_by_author)
###or text = str(federalist_by_author["Madison"])
document_split = text.split('\n\n\n\n\n')
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(document_split)
vocabulary = vectorizer.get_feature_names()
import lda
model = lda.LDA(n_topics = 10, n_iter=1000, random_state=1)
model.fit(X)

topic_word = model.topic_word_
n_top_words=50

for i, topic_distribution in enumerate(topic_word):
  topic_words = np.array(vocabulary)[np.argsort(topic_distribution)][:-(n_top_words+1):-1]
  print('topic {}: {}'.format(i, ' '.join(topic_words)))
  print()


authors = ("Hamilton", "Madison", "Disputed", "Jay", "Shared")