import csv import config import grequests import re from lxml import html from lazyme.string import color_print from tqdm import tqdm def importData(): with open(config.data, 'rb') as f: reader = csv.reader(f) return list(reader) def getSymbol(data): print "Getting Symbol ..." symbolsWithHeader = map(lambda x: x[0], data) symbols = symbolsWithHeader[ 2: len(symbolsWithHeader) ] return map(lambda x: {'symbol': x}, symbols) def toNakedShareNumber(word): return float(re.sub("\D", "", word)) def toNakedPriceNumber(word): word = word.replace(',', '') return float(word) def toNakedFinanceNumber(word): return float(word.replace(',','')) def getProfileUrl(data): return config.url['profile'](data['symbol']) def getPriceUrl(data): return config.url['highlights'](data['symbol']) def getFinanceUrl(data): return config.url['finance'](data['symbol']) def scrapeAndFormatShare(page): if(page == None): return '-1' tree = html.fromstring(page.content) share = tree.xpath('//div[text()="Listed Share"]/following-sibling::div/text()') share = '0' if not share else share[0] return toNakedShareNumber(share) def getShare(datas): print "Getting Share ..." urls = map(getProfileUrl, datas) rs = (grequests.get(u) for u in urls) pages = tqdm(grequests.map(rs)) shares = map(scrapeAndFormatShare, pages) for i in range(len(datas)): datas[i].update({'share': shares[i]}) return datas def scrapeAndFormatPrice(page): if(page == None): return '-1' tree = html.fromstring(page.content) prices = tree.xpath('//td[text()="Last Price(Baht)"]/following-sibling::td/text()') try: price = prices[len(prices) - 1] except: price = '0' return toNakedPriceNumber(price) def getPrice(datas): print "Getting Price ..." urls = map(getPriceUrl, datas) rs = (grequests.get(u) for u in urls) pages = tqdm(grequests.map(rs)) prices = map(scrapeAndFormatPrice, pages) for i in range(len(datas)): datas[i].update({'price': prices[i]}) return datas def scrapeAndFormatFinance(page): if(page == None): return {} t = html.fromstring(page.content).xpath finance = { "cash": t('//td[text()="CASH AND CASH EQUIVALENTS"]/following-sibling::td/text()'), "investment": t('//td[text()="SHORT-TERM INVESTMENTS"]/following-sibling::td/text()'), "recievable": t('//td[text()="TRADE ACCOUNTS AND OTHER RECEIVABLE"]/following-sibling::td/text()'), "asset": t('//td[text()="TOTAL CURRENT ASSETS"]/following-sibling::td/text()'), "inventory": t('//td[text()="INVENTORIES"]/following-sibling::td/text()'), "liability": t('//td[text()="TOTAL LIABILITIES"]/following-sibling::td/text()') } finance = dict(zip(finance, map(lambda x: 0 if not x else x[0], finance.values()))) finance = dict(zip(finance, map(lambda x: 0 if x==0 else toNakedFinanceNumber(x), finance.values()))) finance = dict(zip(finance, map(lambda x: x*1000000, finance.values()))) return finance def getFinance(datas): print "Getting Finance ..." urls = map(getFinanceUrl, datas) rs = (grequests.get(u) for u in urls) pages = tqdm(grequests.map(rs)) finances = map(scrapeAndFormatFinance, pages) for i in range(len(datas)): datas[i].update(finances[i]) return datas def calculateNCAV(data): try: return (data['asset'] - data['liability'])/data['share'] except: return 'N/A' def getNCAV(datas): print "Getting NCAV ..." ncav = map(calculateNCAV, datas) for i in range(len(datas)): datas[i].update({'ncav': ncav[i]}) return datas def calculateNNWC(data): try: return ((data['cash'] + data['investment']) +\ (0.75 * data['recievable']) +\ (0.5 * data['inventory']) -\ (data['liability']))/data['share'] except: return 'N/A' def getNNWC(datas): print "Getting NNWC ..." nnwc = map(calculateNNWC, datas) for i in range(len(datas)): datas[i].update({'nnwc': nnwc[i]}) return datas def calculatePercent(data): try: nnwc = str(round((data['price']/data['nnwc'])*100, 2)) except: nnwc = -1 try: ncav = str(round((data['price']/data['ncav'])*100, 2)) except: ncav = -1 return { 'symbol': data['symbol'], 'nnwc_percent': 'N/A' if data['nnwc'] < 0 else nnwc, 'ncav_percent': 'N/A' if data['ncav'] < 0 else ncav } def displayEach(data): print "=================" print data['symbol'] if(data['nnwc_percent'] == 'N/A'): pass elif(data['nnwc_percent'] >= 0 and data['nnwc_percent'] < 70): color_print("nnwc " + data['nnwc_percent'] + "%", color='green') elif(data['nnwc_percent'] >= 70 and data['nnwc_percent'] < 100): color_print("nnwc " + data['nnwc_percent'] + "%", color='yellow') elif(data['nnwc_percent'] >= 100): color_print("nnwc " + data['nnwc_percent'] + "%", color='red') if(data['ncav_percent'] == 'N/A'): pass elif(data['ncav_percent'] >= 0 and data['ncav_percent'] < 70): color_print("ncav " + data['ncav_percent'] + "%", color='green') elif(data['ncav_percent'] >= 70 and data['ncav_percent'] < 100): color_print("ncav " + data['ncav_percent'] + "%", color='yellow') elif(data['ncav_percent'] >= 100): color_print("ncav " + data['ncav_percent'] + "%", color='red') def getPercent(datas): percents = map(calculatePercent, datas) for i in range(len(datas)): datas[i].update(percents [i]) return datas def display(datas): map(displayEach, datas) def main(): datas = importData() datas = getSymbol(datas) datas = getShare(datas) datas = getPrice(datas) datas = getFinance(datas) datas = getNCAV(datas) datas = getNNWC(datas) datas = getPercent(datas) toCSV(datas) def toCSV(datas): keys = datas[0].keys() with open('final.csv', 'wb') as output_file: dict_writer = csv.DictWriter(output_file, keys) dict_writer.writeheader() dict_writer.writerows(datas) main()