我的第一个python程序,纪念一下。字典文件选的是ubuntu系统中美式英语和英式英语常用单词列表(/usr/share/dict/american-english和/usr/share/dict/british-english)。结果显示s开头的单词最多,11000多,c开头的也不少,接近10000个。

#!/usr/bin/env python
# -*- coding: utf-8 -*-
def getCountList(path):
    dict = {}
    sum = 0 
    file = open(path,'r')
    wordList = file.readlines()
    for word in wordList:
        letter = word[0].lower()
        if(96 < ord(letter) < 123 ):
            if(dict.has_key(letter)):
                dict[letter] += 1
            else:
                dict[letter] = 1 
            sum += 1
        else:
            print '该单词不能识别 %s' % word
    dict['*'] = sum 
    return sorted(dict.items(), key=lambda x:x[1], reverse=True)
def main():
    americanList = getCountList('/usr/share/dict/american-english')
    britishList = getCountList('/usr/share/dict/british-english')
    print ' American   British '
    print '====================='
    for i in range(0,27):
        print ' %s %6d | %s %6d' %(americanList[i][0], americanList[i][1], britishList[i][0], britishList[i][1])

if __name__ == '__main__':
    main()

输出结果

 American   British 
=====================
 *  98553 | *  98310
 s  11072 | s  11035
 c   9507 | c   9478
 p   7539 | p   7513
 b   6095 | b   6087
 m   5952 | m   5958
 a   5902 | a   5884
 d   5811 | d   5807
 r   5285 | r   5270
 t   5002 | t   4996
 f   4075 | f   4057
 h   3856 | h   3855
 e   3813 | e   3796
 i   3613 | i   3598
 g   3452 | g   3436
 l   3437 | l   3425
 w   2711 | w   2706
 o   2240 | o   2238
 n   2015 | n   2011
 u   1899 | u   1895
 v   1584 | v   1582
 j   1259 | j   1253
 k   1247 | k   1250
 q    464 | q    461
 y    380 | y    376
 z    287 | z    287
 x     56 | x     56