""" Print the frequency of the (most frequent) words in a named text file. Author: Rodney Topor Last updated: 6 August 2011 """ from re import split frequencies = {} # word->frequency dictionary def count(filename,frequency=False,number=0): """ Print the frequency of the words in filename. Print in alphabetical order or frequency order depending on frequency. If printing by frequency and number > 0, print only the number most frequent words. """ # make dictionary empty frequencies.clear() # read filename try: # add words in each line to dictionary for line in open(filename, "r"): process(line) except IOError: # basic error checking print 'IO Error in %s' % filename return if frequency: # print the word frequencies by value pairs = [(count,word) for (word,count) in frequencies.iteritems()] pairs.sort() # standard list processing function pairs.reverse() # ditto if number > 0: pairs = pairs[:number] for count, word in pairs: print "%s: %d" % (word, count) else: # order by key # print the word frequencies alphabetically pairs = frequencies.items() # list of (key,value) pairs in dictionary pairs.sort() for word, count in pairs: print "%s: %d" % (word, count) def process(line): """ Add the number of occurrences of each word in line to the dictionary. """ # split line into (alphanumeric) words # (\w=alphanumeric character, \W=nonalphanumeric character) line = line.strip() # remove leading and trailing white space words = split('\W+', line) for word in words: if word != "": word = word.lower() # increment count for word in dictionary if frequencies.has_key(word): frequencies[word] = frequencies[word] + 1 else: frequencies[word] = 1