# Allison Obourn
# CSC 110, Spring 2018
# Lecture 33

# Reads all of the words in Moby Dick. Outputs 50 of those words and
# their counts. Calculates the number of positive words and the number
# of negative words in the book to judge its overall sentiment. 

def main():
    words = count_words()

    # outputs the counts of 50 random words
    for i in range(50):
        word = words.popitem()
        print(word[0], word[1])

    positives = make_set("positive_words.txt")
    negatives = make_set("negative_words.txt")    

    #pos_words = words & positives
    #neg_words = words & negatives

    print(len(words), "total words")
    #print(len(pos_words), "positive words")
    #print(len(neg_words), "negatives words")

# reads all of the words from a file, removes punctuation, converts them
# all to lowercase and then returns a dictionary mapping
# these words to their counts
def count_words():
    file = open("mobydick.txt")
    file = file.read()
    file = file.replace(";", "").replace(".", "").replace("-", " ")
    file = file.replace("!", "").replace("?", "").replace('"', "").replace(",", "")
    file = file.lower().split()
    words = {}
    for i in range(len(file)):
        word = file[i]
        if word in words:
            words[word] += 1
        else:
            words[word] = 1
    return words

# takes a filename as a parameter
# returns a set containing all of the words in the file
def make_set(file_name):
    file = open(file_name)
    file = file.read().split()

    words = set()
    for i in range(len(file)):
        word = file[i]
        words.add(word)
    return words

main()