#r'c:\program files\bozzy\bozzycompare\' #File name: bozzycompare.py #BozzyCompare, supplement to Bozzy, both by Mirabai Knight #Final working version: January 15, 2007 import sets import os import time import re # puts empty line at end of list; otherwise last word of edic will appear in newlist whether matched or not int_words = file(r'c:\program files\bozzy\bozzycompare\edic.txt','a') hard_break = '\n' int_words.write(hard_break) int_words.close() # edic = external dictionary; pdic = personal dictionary (only DigitalCAT rtf/cre currently supported) int_words = file(r'c:\program files\bozzy\bozzycompare\edic.txt','r') rtf_words = file(r'c:\program files\bozzy\bozzycompare\pdic.rtf','r') intersec = file(r'c:\program files\bozzy\bozzycompare\newlist.txt','w') # strips rtf/cre markup to leave only English, one word per line rtf_words = rtf_words.read() rtf_words = rtf_words.replace(r'{\rtf1\ansi{\*\cxrev100}\cxdict{\*\cxsystem','') rtf_words = rtf_words.replace(r'''}\deff0{\fonttbl {\f0\fmodern Courier New;}}{\stylesheet{\s0 Normal;}{\s1 Question; }{\s2 Answer;}{\s3 Colloquy;}}{\*\cxtranopt\numbar}''','') rtf_words = rtf_words.replace(r'Digital CAT 00190 RTF by Stenovations,','') rtf_words = rtf_words.replace(r'Inc.','') rtf_words = re.sub('\*','',rtf_words) rtf_words = re.sub(r'\\','',rtf_words) rtf_words = re.sub('{.*?}','',rtf_words) rtf_words = re.sub('}','',rtf_words) rtf_words = rtf_words.lstrip() # sets work from files but not strings rtf_manip = file(r'c:\program files\bozzy\bozzycompare\rtfmanip.txt','w') rtf_manip.write(str(rtf_words)) rtf_manip.close() rtf_manip = file(r'c:\program files\bozzy\bozzycompare\rtfmanip.txt','r') # compares edic to pdic; all words in edic that aren't in pdic are written to newlist rtf_words = set(rtf_manip) int_words = set(int_words) intersec_set = int_words.difference(rtf_words) intersec_set = str(intersec_set) # formats list so it's Bozzy-compatible, one word per line intersec_set = intersec_set.replace(r'set([','') intersec_set = intersec_set.replace(r', ','\n') intersec_set = intersec_set.replace(r'])','') intersec_set = intersec_set.replace(r"'","") intersec_set = intersec_set.replace(r'\n','') intersec.write(intersec_set) intersec.close() rtf_manip.close() # removes temporary file os.remove(r'c:\program files\bozzy\bozzycompare\rtfmanip.txt') # writes newlist to unique filename os.rename(r'c:\program files\bozzy\bozzycompare\newlist.txt','bc' + time.strftime('%m%d%y%H%M%S') + '.txt') print 'Done'