-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdate.py
150 lines (109 loc) · 4.81 KB
/
update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# Copyright (c) 2014 Brad Neuman
# Updates (or initializes) the database and fills it in with missing commits from git. Can be insanely slow
# the first time
# Requires sqlite3
from gitBlameStats import *
from progressTracker import *
import argparse
import blameDBQuery as query
import os
import sqlite3
import subprocess
parser = argparse.ArgumentParser(description = "Update the blame database for the given repository")
parser.add_argument('--recursive', '-r', help='Also update all submodules',
default = False, action='store_true')
parser.add_argument('--dry-run', '-n', help='Dry-run (just print what would be done, don\'t do it',
default = False, action='store_true')
parser.add_argument('path', help="path to the repository to update")
args = parser.parse_args()
repo_paths = set([args.path])
if args.recursive:
print 'searching for submodules...'
def check_modules(repo):
modfile = os.path.join(repo, '.gitmodules')
if os.path.isfile( modfile ):
args = ['git', '--no-pager', 'config',
'--file', modfile, '--get-regexp', '.*path']
result = subprocess.check_output(args)
for line in result.split('\n'):
s = line.strip().split(' ')
if len(s) == 2:
repo_paths.add( os.path.join( repo, s[1] ) )
check_modules(os.path.join( repo, s[1] ))
check_modules(args.path)
db_filename = 'blame.db'
schema_filename = 'schema.sql'
db_is_new = not os.path.exists(db_filename)
for repo_path in repo_paths:
path_split = os.path.split(repo_path)
if path_split[1] == '':
# if there is a trailing slash, the last part might be empty, so go up one
path_split = os.path.split(path_split[0])
repo_name = path_split[1]
print "Run update on repository at '%s'" % repo_path
bs = BlameStats(repo_path, debug = False)
if db_is_new:
print 'Creating new blank databse'
if args.dry_run:
continue
conn = sqlite3.connect(db_filename)
with conn:
if db_is_new:
with open(schema_filename, 'rt') as f:
schema = f.read()
conn.executescript(schema)
db_is_new = False
# get the latest revision in the database
row = query.GetLatestRevision(conn.cursor(), repo_name)
latestRev = None
lastOrder = 0
if row and row[0]:
lastOrder = int(row[1])
latestRev = row[0]
print "lastest revision for '%s' is '%s'" % (repo_name, latestRev)
revs = bs.GetAllCommits(since=latestRev)
print 'have %d revisions to update' % len(revs)
if not args.dry_run:
pt = ProgressTracker(len(revs))
curr_order = lastOrder + 1
with conn:
stats = query.GetLatestFullBlames(conn.cursor(), repo_name)
for i in range(len(revs)):
rev = revs[i]
if len(repo_paths) > 1:
print os.path.basename(repo_path), rev, pt.Update()
else:
print rev, pt.Update()
# first, update the commits table
commit_ts, commit_author = bs.GetCommitProperties(rev)
val = (rev, repo_name, curr_order, commit_ts, commit_author)
curr_order += 1
with conn:
conn.cursor().execute('insert into commits values (?, ?, ?, ?, ?)', val)
lastRev = None
if i > 0:
lastRev = revs[i-1]
# now update the main blames table
newStats = bs.GetCommitStats(rev)
for filename in newStats:
stats[filename] = newStats[filename]
filenamesToDelete = []
for filename in stats:
if stats[filename]:
for author in stats[filename]:
lines = stats[filename][author]
val = (rev, repo_name, filename, author, lines)
# print "inserting:", val
conn.cursor().execute('insert into full_blames values (?, ?, ?, ?, ?)', val)
# else:
# # add a row with 0 lines to show that the file is no longer present, then remove it from stats
# val = (rev, repo_name, filename, '', 0)
# # print "inserting:", val
# cur.execute('insert into full_blames values (?, ?, ?, ?, ?)', val)
# filenamesToDelete.append(filename)
for filename in filenamesToDelete:
del stats[filename]
# commit every now and then so we don't lose everything if something goes wrong
if i % 20 == 0:
conn.commit()
print pt.Done()