import os import requests setName = 'ths' nameStart = '' oracleStart = '

' oracleEnd = '

' def normalizeName(name): return name.lower().replace(',','').replace("'","").replace(' ', '_') def normalizeOracle(oracle): return oracle.replace(u'\u2014', '-').replace(u'\u2018', "'") r = requests.get('http://magiccards.info/query?v=spoiler&s=issue&q=++e:%s/en' % setName) spl = r.text.split(nameStart) spl.pop(0) # Get rid of all of the html that comes before our first card for s in spl: # Extract name and oracle from magiccards.info name = s[1 + s.find(">"):s.find("")] oracle = s[len(oracleStart)+s.find(oracleStart):s.find(oracleEnd)].replace('

', '\\n') norm = normalizeName(name) # Open relative cardsfolder path = os.path.join('..','res','cardsfolder', norm[0], norm+'.txt') hasOracle = False try: with open(path, 'r') as f: for line in f.readlines(): hasOracle |= line.startswith("Oracle:") if not hasOracle: with open(path, "a") as f: f.write('\n') f.write(normalizeOracle(oracle)) print '+ ', norm else: print '= ', norm except: print '? ', norm