sheerun/githubsocial

View on GitHub
bin/extract-repos.py

Summary

Maintainability
A
0 mins
Test Coverage
from bsonstream import KeyValueBSONInput
import sys
from sys import argv
import gzip
from bson.json_util import dumps

f = sys.stdin
stream = KeyValueBSONInput(fh=f, fast_string_prematch="github")
for dict_data in stream:
    for k in dict_data.keys():
        if k.endswith('url'):
            dict_data.pop(k)

    dict_data.pop('_id')

    if 'permissions' in dict_data:
        dict_data.pop('permissions')

    dict_data['owner_id'] = dict_data['owner']['id']
    dict_data.pop('owner')

    for field in ['organization', 'parent', 'source']:
        if field in dict_data:
            dict_data[field + '_id'] = dict_data[field]['id']
            dict_data.pop(field)

    print dumps(dict_data)