-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_file_change.py
88 lines (73 loc) · 2.89 KB
/
check_file_change.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import pandas as pd
import subprocess
import os
repo_storage = "/drive1/phatnt/zTrans/data/repos"
def read_file_in_commit(repo_dir, rev_path, commit_hash):
cmd = (
f"git config --global --add safe.directory {repo_dir} && "
f"cd {repo_dir} && "
f"git show {commit_hash}:{rev_path}"
)
try:
res = subprocess.check_output(cmd, shell=True)
except subprocess.CalledProcessError:
return None
return res.decode("utf-8")
def get_info_from_git_diff(repo_dir, commit1, commit2, file1, file2):
cmd = (
f"git config --global --add safe.directory {repo_dir} && "
f"cd {repo_dir} && "
f"git diff --name-status {commit1} {commit2}"
)
try:
res = subprocess.check_output(cmd, shell=True)
res = res.decode("utf-8").splitlines()
for line in res:
if line.startswith("R"):
mode, old, new = line.split("\t")
if old == file1 and new == file2:
similarity_index = int(mode[1:])
return similarity_index
else:
raise Exception("Something wrong")
except subprocess.CalledProcessError as e:
raise e
# ==============================================================================
df = pd.read_csv("data/migrations_36_file.csv")
num_has_java_diff_file = 0
num_has_modified_file = 0
num_java_file_added = 0
num_java_file_deleted = 0
num_java_file_modified = 0
num_java_file_renamed_modified = 0
num_java_file_renamed_unchanged = 0
for _, row in df.iterrows():
added = eval(row["java_added"])
deleted = eval(row["java_added"])
modified = eval(row["java_modified"])
renamed_modified = eval(row["java_renamed_modified"])
renamed_unchanged = eval(row["java_renamed_unchanged"])
if added or deleted or modified or renamed_modified or renamed_unchanged:
num_has_java_diff_file += 1
if modified or renamed_modified:
num_has_modified_file += 1
num_java_file_added += len(added)
num_java_file_deleted += len(deleted)
num_java_file_modified += len(modified)
num_java_file_renamed_modified += len(renamed_modified)
num_java_file_renamed_unchanged += len(renamed_unchanged)
print("Number of Java files added: ", num_java_file_added)
print("Number of Java files deleted: ", num_java_file_deleted)
print("Number of Java files modified: ", num_java_file_modified)
print("Number of Java files renamed and modified: ", num_java_file_renamed_modified)
print("Number of Java files renamed and unchanged: ", num_java_file_renamed_unchanged)
print("Number of commits that have Java file changes: ", num_has_java_diff_file)
print("Number of commits that have modified Java files: ", num_has_modified_file)
print(
"Num diff java files: ",
num_java_file_added
+ num_java_file_deleted
+ num_java_file_modified
+ num_java_file_renamed_unchanged
+ num_java_file_renamed_modified,
)