# Allison Obourn
# CSC 110, Spring 2018
# Lecture 34

# Reads all of the words in Moby Dick. Outputs 50 of those words and
# their counts. Calculates the number of positive words and the number
# of negative words in the book to judge its overall sentiment. 

def main():
    words = count_words()

    # outputs the counts of 50 random words
    for word, count in words.items():
        if count > 20:
            print(word, count)

    positives = make_set("positive_words.txt")
    negatives = make_set("negative_words.txt")    

    pos_words = count_word_set(words, positives)
    neg_words = count_word_set(words, negatives)

    print(len(words), "total words")
    print(pos_words, "positive words")
    print(neg_words, "negatives words")

# takes a dictionary of words to counts and a set of words as
# parameters. Returns the sum of the counts of all of the words
# in the set that are also in the dictionary
def count_word_set(words, positives):
    pos_words = 0
    for word in positives:
        if word in words:
            pos_words += words[word]
    return pos_words

# reads all of the words from a file, removes punctuation, converts them
# all to lowercase and then returns a dictionary mapping
# these words to their counts
def count_words():
    file = open("stateoftheunion.txt")
    file = file.read()
    file = file.replace(";", "").replace(".", "").replace("-", " ")
    file = file.replace("!", "").replace("?", "").replace('"', "").replace(",", "")
    file = file.lower().split()
    words = {}
    for i in range(len(file)):
        word = file[i]
        if word in words:
            words[word] += 1
        else:
            words[word] = 1
    return words

# takes a filename as a parameter
# returns a set containing all of the words in the file
def make_set(file_name):
    file = open(file_name)
    file = file.read().split()

    words = set()
    for i in range(len(file)):
        word = file[i]
        words.add(word)
    return words

main()
