-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdvc.lock
352 lines (352 loc) · 12 KB
/
dvc.lock
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
schema: '2.0'
stages:
download_DevGPT:
cmd:
- dvc get-url --force https://zenodo.org/records/10086809/files/DevGPT.zip?download=1
- mkdir -p data/external
- unzip DevGPT.zip -d data/external/DevGPT
- rm DevGPT.zip
outs:
- path: data/external/DevGPT/
hash: md5
md5: 223e6501a84a339b341bfb843595d1b2.dir
size: 4182300528
nfiles: 65
clone_repos:
cmd: python scripts/data/download_repositories.py data/external/DevGPT/ /mnt/data/MSR_Challenge_2024/repositories/
data/repositories_download_status.json
deps:
- path: data/external/DevGPT/
hash: md5
md5: 223e6501a84a339b341bfb843595d1b2.dir
size: 4182300528
nfiles: 65
- path: scripts/data/download_repositories.py
hash: md5
md5: ecf6bc29c934494835f8117b203198c2
size: 7849
outs:
- path: data/repositories_download_status.json
hash: md5
md5: 985cc0f559f68de003cea224e8a7bc4d
size: 177064
commit_agg:
cmd: python scripts/data/commit_sharings_to_agg.py data/external/DevGPT/ data/repositories_download_status.json
data/interim/
deps:
- path: data/external/DevGPT/
hash: md5
md5: 223e6501a84a339b341bfb843595d1b2.dir
size: 4182300528
nfiles: 65
- path: data/repositories_download_status.json
hash: md5
md5: 985cc0f559f68de003cea224e8a7bc4d
size: 177064
- path: scripts/data/commit_sharings_to_agg.py
hash: md5
md5: 8528815d3f6dea73c0250f9109522b82
size: 5624
outs:
- path: data/interim/commit_sharings_df.csv
hash: md5
md5: dee35aeb0d0b7fa354b0022f9bd998a1
size: 484886
- path: data/interim/commit_sharings_groupby_repo_df.csv
hash: md5
md5: fb48677300a61d2877503ec897f0c4e5
size: 4994
commit_survival:
cmd: python scripts/data/compute_changes_survival.py data/interim/commit_sharings_df.csv
data/repositories_download_status.json data/interim/commit_sharings_changes_survival_df.csv
data/interim/commit_sharings_lines_survival_df.csv
deps:
- path: data/interim/commit_sharings_df.csv
hash: md5
md5: dee35aeb0d0b7fa354b0022f9bd998a1
size: 484886
- path: data/repositories_download_status.json
hash: md5
md5: 985cc0f559f68de003cea224e8a7bc4d
size: 177064
- path: scripts/data/compute_changes_survival.py
hash: md5
md5: 8ad8a68a376a824ab1fb6ba1eea85863
size: 23948
outs:
- path: data/interim/commit_sharings_changes_survival_df.csv
hash: md5
md5: 194afee4cff70543824bd2a73e3a1f1b
size: 532182
- path: data/interim/commit_sharings_lines_survival_df.csv
hash: md5
md5: 021b08469d02ccf69e409720bb12f75e
size: 63102513
repo_stats_git:
cmd: python scripts/data/compute_repository_statistics_git.py data/external/DevGPT/
data/repositories_download_status.json data/interim/repository_statistics_git.json
deps:
- path: data/external/DevGPT/
hash: md5
md5: 223e6501a84a339b341bfb843595d1b2.dir
size: 4182300528
nfiles: 65
- path: data/repositories_download_status.json
hash: md5
md5: 985cc0f559f68de003cea224e8a7bc4d
size: 177064
- path: scripts/data/compute_repository_statistics_git.py
hash: md5
md5: 3a1ab9002d42ab9f1809764e8c4c1be1
size: 6566
outs:
- path: data/interim/repository_statistics_git.json
hash: md5
md5: a2d2d11ae1f4b6b10fcf3adec80cf08a
size: 303476
repo_stats_github:
cmd: python scripts/data/compute_repository_statistics_github.py data/external/DevGPT/
data/repositories_download_status.json data/interim/repository_statistics_github.json
deps:
- path: data/external/DevGPT/
hash: md5
md5: 223e6501a84a339b341bfb843595d1b2.dir
size: 4182300528
nfiles: 65
- path: data/repositories_download_status.json
hash: md5
md5: 985cc0f559f68de003cea224e8a7bc4d
size: 177064
- path: scripts/data/compute_repository_statistics_github.py
hash: md5
md5: a68674ba5fa9a454f2d14c048f11bb49
size: 5164
outs:
- path: data/interim/repository_statistics_github.json
hash: md5
md5: 822db4d12c06eea2e76578f7a4f3fb91
size: 614770
pr_agg:
cmd: python scripts/data/pr_sharings_to_agg.py data/external/DevGPT/ data/repositories_download_status.json
data/interim/
deps:
- path: data/external/DevGPT/
hash: md5
md5: 223e6501a84a339b341bfb843595d1b2.dir
size: 4182300528
nfiles: 65
- path: data/repositories_download_status.json
hash: md5
md5: 985cc0f559f68de003cea224e8a7bc4d
size: 177064
- path: scripts/data/pr_sharings_to_agg.py
hash: md5
md5: a0f9d5fae3236d03c1ccc124cc4c511a
size: 11648
outs:
- path: data/interim/pr_sharings_df.csv
hash: md5
md5: 343abf1b666b59e70e9f9ddd10762642
size: 771169
- path: data/interim/pr_sharings_groupby_repo_df.csv
hash: md5
md5: 5a9a8199defd70f663a4f534dfdfcc79
size: 16720
- path: data/interim/pr_sharings_split_commit_df.csv
hash: md5
md5: b61516427911535aa4df16aa63ae4480
size: 1208561
pr_survival:
cmd: python scripts/data/compute_changes_survival.py data/interim/pr_sharings_df.csv
data/repositories_download_status.json data/interim/pr_sharings_changes_survival_df.csv
data/interim/pr_sharings_lines_survival_df.csv
deps:
- path: data/interim/pr_sharings_df.csv
hash: md5
md5: 343abf1b666b59e70e9f9ddd10762642
size: 771169
- path: data/repositories_download_status.json
hash: md5
md5: 985cc0f559f68de003cea224e8a7bc4d
size: 177064
- path: scripts/data/compute_changes_survival.py
hash: md5
md5: 8ad8a68a376a824ab1fb6ba1eea85863
size: 23948
outs:
- path: data/interim/pr_sharings_changes_survival_df.csv
hash: md5
md5: 387450564b3b846886a5c046661c5d82
size: 595511
- path: data/interim/pr_sharings_lines_survival_df.csv
hash: md5
md5: fec947352a0452c506bd4b74eb6ffc28
size: 55564668
pr_split_survival:
cmd: python scripts/data/compute_changes_survival.py data/interim/pr_sharings_split_commit_df.csv
data/repositories_download_status.json data/interim/pr_sharings_split_commit_changes_survival_df.csv
data/interim/pr_sharings_split_commit_lines_survival_df.csv
deps:
- path: data/interim/pr_sharings_split_commit_df.csv
hash: md5
md5: b61516427911535aa4df16aa63ae4480
size: 1208561
- path: data/repositories_download_status.json
hash: md5
md5: 985cc0f559f68de003cea224e8a7bc4d
size: 177064
- path: scripts/data/compute_changes_survival.py
hash: md5
md5: 8ad8a68a376a824ab1fb6ba1eea85863
size: 23948
outs:
- path: data/interim/pr_sharings_split_commit_changes_survival_df.csv
hash: md5
md5: 5f6f413d380b722563537eebaa0ba4fa
size: 1320298
- path: data/interim/pr_sharings_split_commit_lines_survival_df.csv
hash: md5
md5: e6e8317b34dedf0839fc26def0424eb6
size: 161049373
issue_agg:
cmd: python scripts/data/issue_sharings_to_agg.py data/external/DevGPT/ data/repositories_download_status.json
data/interim/
deps:
- path: data/external/DevGPT/
hash: md5
md5: 223e6501a84a339b341bfb843595d1b2.dir
size: 4182300528
nfiles: 65
- path: data/repositories_download_status.json
hash: md5
md5: 985cc0f559f68de003cea224e8a7bc4d
size: 177064
- path: scripts/data/issue_sharings_to_agg.py
hash: md5
md5: 762ebc067ad1d8c7c4d5abc608321947
size: 17028
outs:
- path: data/interim/issue_sharings_df.csv
hash: md5
md5: 3efdd8aa822b83dd02978dd323313e52
size: 718852
- path: data/interim/issue_sharings_groupby_repo_df.csv
hash: md5
md5: 4e08f0712c3675e8fd26d7ae1e141e45
size: 28098
issue_survival:
cmd: python scripts/data/compute_changes_survival.py data/interim/issue_sharings_df.csv
data/repositories_download_status.json data/interim/issue_sharings_changes_survival_df.csv
data/interim/issue_sharings_lines_survival_df.csv
deps:
- path: data/interim/issue_sharings_df.csv
hash: md5
md5: 3efdd8aa822b83dd02978dd323313e52
size: 718852
- path: data/repositories_download_status.json
hash: md5
md5: 985cc0f559f68de003cea224e8a7bc4d
size: 177064
- path: scripts/data/compute_changes_survival.py
hash: md5
md5: 8ad8a68a376a824ab1fb6ba1eea85863
size: 23948
outs:
- path: data/interim/issue_sharings_changes_survival_df.csv
hash: md5
md5: 71da5434e85ed2e9505486b3c939b9d1
size: 104116
- path: data/interim/issue_sharings_lines_survival_df.csv
hash: md5
md5: b2461f13359cfeb23ecc0170957af687
size: 25476881
commit_similarities:
cmd: python scripts/data/find_chatgpt_changes_similarities.py data/external/DevGPT/
data/interim/commit_sharings_df.csv data/repositories_download_status.json data/interim/commit_sharings_similarities_df.csv
deps:
- path: data/external/DevGPT/
hash: md5
md5: 223e6501a84a339b341bfb843595d1b2.dir
size: 4182300528
nfiles: 65
- path: data/interim/commit_sharings_df.csv
hash: md5
md5: dee35aeb0d0b7fa354b0022f9bd998a1
size: 484886
- path: data/repositories_download_status.json
hash: md5
md5: 985cc0f559f68de003cea224e8a7bc4d
size: 177064
- path: scripts/data/find_chatgpt_changes_similarities.py
hash: md5
md5: f03016677efbf785050bf58ef967389e
size: 18579
outs:
- path: data/interim/commit_sharings_similarities_df.checkpoint_data.json
hash: md5
md5: d0681a952d0715de7a5ad05c9fad7d1e
size: 6990471
- path: data/interim/commit_sharings_similarities_df.csv
hash: md5
md5: f8ed4db14670b3f53d12424b750155d7
size: 17490798
pr_similarities:
cmd: python scripts/data/find_chatgpt_changes_similarities.py data/external/DevGPT/
data/interim/pr_sharings_df.csv data/repositories_download_status.json data/interim/pr_mergesha_sharings_similarities_df.csv
deps:
- path: data/external/DevGPT/
hash: md5
md5: 223e6501a84a339b341bfb843595d1b2.dir
size: 4182300528
nfiles: 65
- path: data/interim/pr_sharings_df.csv
hash: md5
md5: 343abf1b666b59e70e9f9ddd10762642
size: 771169
- path: data/repositories_download_status.json
hash: md5
md5: 985cc0f559f68de003cea224e8a7bc4d
size: 177064
- path: scripts/data/find_chatgpt_changes_similarities.py
hash: md5
md5: f03016677efbf785050bf58ef967389e
size: 18579
outs:
- path: data/interim/pr_mergesha_sharings_similarities_df.checkpoint_data.json
hash: md5
md5: 38fa01197e48f529a3dbed4aa46dc653
size: 5704439
- path: data/interim/pr_mergesha_sharings_similarities_df.csv
hash: md5
md5: 035a37ddef5afd506f95fc3ee0de6acb
size: 7955642
issue_similarities:
cmd: python scripts/data/find_chatgpt_changes_similarities.py data/external/DevGPT/
data/interim/issue_sharings_df.csv data/repositories_download_status.json data/interim/issue_sharings_similarities_df.csv
deps:
- path: data/external/DevGPT/
hash: md5
md5: 223e6501a84a339b341bfb843595d1b2.dir
size: 4182300528
nfiles: 65
- path: data/interim/issue_sharings_df.csv
hash: md5
md5: 3efdd8aa822b83dd02978dd323313e52
size: 718852
- path: data/repositories_download_status.json
hash: md5
md5: 985cc0f559f68de003cea224e8a7bc4d
size: 177064
- path: scripts/data/find_chatgpt_changes_similarities.py
hash: md5
md5: f03016677efbf785050bf58ef967389e
size: 18579
outs:
- path: data/interim/issue_sharings_similarities_df.checkpoint_data.json
hash: md5
md5: 02eb0ad8237c23192f7db1eab5c1c150
size: 1014307
- path: data/interim/issue_sharings_similarities_df.csv
hash: md5
md5: e5de736e9621fef7af71e8d20ca57ea3
size: 1012465