scripts/install.py
# install.py
import os
data_fldr = os.path.abspath(os.path.dirname(os.path.abspath(__file__)) + os.sep + '..' + os.sep + 'rawdata' + os.sep + 'data' )
ndx_file = data_fldr + os.sep + '_index.ndx'
def main():
print('install.py')
print('Script to setup local data')
print('folders, connections to databases, etc.\n')
print('[TODO] Setup Local folder structure...')
rebuild_index()
print('Done')
def rebuild_index():
"""
scans rawdata\data\ folders to create lookups
of all tables, column names
"""
print('Building indexes...')
print(data_fldr)
ndx = []
for root, _, files in os.walk(data_fldr):
for f in files:
if f[-3:].upper() in ['CSV','TXT']:
ndx.extend(get_index_terms(root + os.sep + f))
with open(ndx_file, 'w') as fio:
for i in ndx:
fio.write(i + '\n')
def get_index_terms(fname):
"""
reads the file 'fname' and returns all index values
for it in terms of fname.col_name, e.g.
finance_transactions.transaction_type
"""
data_files = []
terms = []
folder_names = fname.split(os.sep)
start = False
root_name = ''
for fldr in folder_names:
if start == True:
root_name += fldr + '.'
if fldr == 'data':
start = True
root_name = root_name[:-5]
data_files.append(root_name)
#read the file and add column names to list of terms
with open(fname, 'r') as f:
hdr = f.readline()
cols = hdr.split(',')
for col in cols:
terms.append(root_name + '.' + col.strip(' ').strip('"').strip('\n'))
return terms
main()