Add login reminders
[gerrit.git] / resources / com / google / gerrit / pgm / init / generate-comment-diff.py
1 #!/usr/bin/env python3
2
3 # If you modify this file, please pass it through:
4 #
5 #   flake8 --ignore=E501,W503 generate-comment-diff.py
6 #   black generate-comment-diff.py
7 #
8 # And test with:
9 #
10 #   ./generate-comment-diff-test.sh
11
12 import json
13 import requests
14 import sys
15 import urllib.parse
16 import textwrap
17 from collections import OrderedDict
18
19
20 class Project:
21     def __init__(self, name):
22         self._name = name
23
24     @property
25     def name(self):
26         return self._name
27
28     @classmethod
29     def from_raw(cls, name, raw):
30         return cls(name)
31
32     def __str__(self):
33         return "<Project {}>".format(self.name)
34
35     def __repr__(self):
36         return str(self)
37
38
39 class Change:
40     @property
41     def number(self):
42         return self._number
43
44     @property
45     def subject(self):
46         return self._subject
47
48     @property
49     def project(self):
50         return self._project
51
52     @classmethod
53     def from_raw(cls, raw):
54         change = cls()
55
56         change._number = raw["_number"]
57         change._subject = raw["subject"]
58         change._project = raw["project"]
59
60         return change
61
62     def __str__(self):
63         return "<Change {}>".format(self.number)
64
65     def __repr__(self):
66         return str(self)
67
68
69 class Account:
70     @property
71     def name(self):
72         return self._name
73
74     @property
75     def id(self):
76         return self._id
77
78     @classmethod
79     def from_raw(cls, raw):
80         account = cls()
81
82         account._name = raw["name"]
83         account._id = raw["_account_id"]
84
85         return account
86
87
88 class Message:
89     @property
90     def author(self):
91         return self._author
92
93     @property
94     def date(self):
95         return self._date
96
97     @property
98     def message(self):
99         return self._message
100
101     @classmethod
102     def from_raw(cls, raw):
103         message = cls()
104
105         message._author = Account.from_raw(raw["author"])
106         message._date = raw["date"]
107         message._message = raw["message"]
108
109         return message
110
111     def __str__(self):
112         return "<Message by {} at {}>".format(self.author.name, self.date)
113
114     def __repr__(self):
115         return str(self)
116
117
118 class Range:
119     @property
120     def start_line(self):
121         return self._start_line
122
123     @property
124     def end_line(self):
125         return self._end_line
126
127     @classmethod
128     def from_raw(cls, raw):
129         rng = cls()
130
131         rng._start_line = raw["start_line"]
132         rng._end_line = raw["end_line"]
133
134         return rng
135
136
137 class Comment:
138     @property
139     def author(self):
140         return self._author
141
142     @property
143     def date(self):
144         return self._date
145
146     @property
147     def message(self):
148         return self._message
149
150     @property
151     def side(self):
152         return self._side
153
154     @property
155     def line(self):
156         return self._line
157
158     @property
159     def range(self):
160         return self._range
161
162     @property
163     def path(self):
164         return self._path
165
166     @classmethod
167     def from_raw(cls, raw, path=None):
168         comment = cls()
169
170         comment._author = Account.from_raw(raw["author"])
171         comment._date = raw["updated"]
172         comment._message = raw["message"]
173
174         if path is not None:
175             comment._path = path
176         else:
177             comment._path = raw["path"]
178
179         comment._side = raw.get("side", "REVISION")
180
181         comment._line = raw.get("line", None)
182
183         if "range" in raw:
184             comment._range = Range.from_raw(raw["range"])
185         else:
186             comment._range = None
187
188         return comment
189
190     def __str__(self):
191         return "<Comment by {} at {}>".format(self.author.name, self.date)
192
193     def __repr__(self):
194         return str(self)
195
196
197 class Diff:
198     @property
199     def content(self):
200         return self._content
201
202     @property
203     def path_a(self):
204         return self._path_a
205
206     @property
207     def path_b(self):
208         return self._path_b
209
210     @classmethod
211     def from_raw(cls, raw):
212         diff = cls()
213
214         diff._content = raw["content"]
215
216         if "meta_a" in raw:
217             diff._path_a = raw["meta_a"]["name"]
218         else:
219             diff._path_a = None
220
221         if "meta_b" in raw:
222             diff._path_b = raw["meta_b"]["name"]
223         else:
224             diff._path_b = None
225
226         return diff
227
228
229 class Server:
230     def __init__(self, base_addr):
231         self._base_addr = base_addr
232
233     def _json_query(self, path):
234         url = "{}/{}".format(self._base_addr, path)
235         # print("Getting {}".format(url))
236         text = requests.get(url).text
237         text = text[5:]
238         return json.loads(text)
239
240     @property
241     def base_addr(self):
242         return self._base_addr
243
244     def get_projects(self):
245         raw = self._json_query("projects/")
246         projects = []
247
248         for (name, proj_raw) in raw.items():
249             projects.append(Project.from_raw(name, proj_raw))
250
251         return projects
252
253     def get_change(self, change_number):
254         raw = self._json_query("changes/?q=change:{}".format(change_number))
255         if len(raw) == 0:
256             return None
257
258         return Change.from_raw(raw[0])
259
260     def get_changes(self):
261         raw = self._json_query("changes/")
262         changes = []
263
264         for change_raw in raw:
265             changes.append(Change.from_raw(change_raw))
266
267         return changes
268
269     def get_change_messages(self, change):
270         raw = self._json_query(
271             "changes/{}~{}/messages".format(change.project, change.number,)
272         )
273
274         messages = []
275
276         for message_raw in raw:
277             messages.append(Message.from_raw(message_raw))
278
279         return messages
280
281     def get_change_message_comments(self, change, message_filter):
282         raw = self._json_query(
283             "changes/{}~{}/comments".format(change.project, change.number,)
284         )
285
286         # dict with revision as key -> dict with path as key -> list of comments on that rev/path.
287         comments_by_revision = {}
288
289         for (path, comment_raw_list) in raw.items():
290             for comment_raw in comment_raw_list:
291                 if (
292                     message_filter.author.id == comment_raw["author"]["_account_id"]
293                     and message_filter.date == comment_raw["updated"]
294                 ):
295                     rev = comment_raw["patch_set"]
296
297                     if rev not in comments_by_revision:
298                         comments_by_revision[rev] = OrderedDict()
299
300                     comments_for_that_revision = comments_by_revision[rev]
301
302                     if path not in comments_for_that_revision:
303                         comments_for_that_revision[path] = []
304
305                     comments_for_that_revision[path].append(
306                         Comment.from_raw(comment_raw, path=path)
307                     )
308
309         return comments_by_revision
310
311     def get_diff(self, change, revision, path):
312         raw = self._json_query(
313             "changes/{}~{}/revisions/{}/files/{}/diff?context=ALL&intraline&whitespace=IGNORE_NONE".format(
314                 change.project,
315                 change.number,
316                 revision,
317                 urllib.parse.quote(path, safe=""),
318             )
319         )
320
321         return Diff.from_raw(raw)
322
323
324 def print_comment(comment, revision):
325     if comment.line is None:
326         print("PS{}:".format(revision))
327     else:
328         print("PS{}, Line {}:".format(revision, comment.line))
329
330     print()
331
332     comment_lines = comment.message.splitlines()
333
334     for line in comment_lines:
335         # Don't wrap lines that are quotes or code blocks (which start with a space)
336         if line.startswith(">") or line.startswith(" "):
337             print(line)
338         else:
339             print(textwrap.fill(line))
340
341
342 def is_interesting_line_c(line):
343     if len(line) == 0:
344         return False
345
346     if line[0].isspace():
347         return False
348
349     if line in ("{", "}"):
350         return False
351
352     # Other ideas:
353     #
354     #   - skip things that look like labels, /^[a-zA-Z0-9_]+:$/
355     #   - skip preprocessor directives
356     return True
357
358
359 def render_diff(diff):
360     diff_lines = []
361
362     # Maps line numbers of files A/B (1-based) to the corresponding index
363     # (0-based) in diff_lines.
364     #
365     # The index 0 in these list is unused (there is no line number 0), so set
366     # it to -1 to ensure it's not used as an index.
367     line_mapping_a_to_diff = [-1]
368     line_mapping_b_to_diff = [-1]
369
370     # Last line we've seen that is worthy of being used as context in range
371     # headers.
372     last_interesting_line = ""
373
374     for chunk in diff.content:
375         if "ab" in chunk:
376             for line in chunk["ab"]:
377                 diff_lines.append(
378                     {
379                         "line": " {}".format(line),
380                         "a": len(line_mapping_a_to_diff),
381                         "b": len(line_mapping_b_to_diff),
382                         # If a range were to start at this line, what would be
383                         # the line number we would write in the header for each
384                         # of the files.  And what would be the context line
385                         # included in the header.
386                         "line-num-a": len(line_mapping_a_to_diff),
387                         "line-num-b": len(line_mapping_b_to_diff),
388                         "context": last_interesting_line,
389                     }
390                 )
391
392                 line_mapping_a_to_diff.append(len(diff_lines) - 1)
393                 line_mapping_b_to_diff.append(len(diff_lines) - 1)
394
395                 if is_interesting_line_c(line):
396                     last_interesting_line = line
397
398         if "a" in chunk:
399             for line in chunk["a"]:
400                 diff_lines.append(
401                     {
402                         "line": "-{}".format(line),
403                         "a": len(line_mapping_a_to_diff),
404                         "line-num-a": len(line_mapping_a_to_diff),
405                         "line-num-b": len(line_mapping_b_to_diff),
406                         "context": last_interesting_line,
407                     }
408                 )
409
410                 line_mapping_a_to_diff.append(len(diff_lines) - 1)
411
412                 if is_interesting_line_c(line):
413                     last_interesting_line = line
414
415         if "b" in chunk:
416             for line in chunk["b"]:
417                 diff_lines.append(
418                     {
419                         "line": "+{}".format(line),
420                         "b": len(line_mapping_b_to_diff),
421                         "line-num-a": len(line_mapping_a_to_diff),
422                         "line-num-b": len(line_mapping_b_to_diff),
423                         "context": last_interesting_line,
424                     }
425                 )
426
427                 line_mapping_b_to_diff.append(len(diff_lines) - 1)
428
429                 if is_interesting_line_c(line):
430                     last_interesting_line = line
431
432     return diff_lines, line_mapping_a_to_diff, line_mapping_b_to_diff
433
434
435 def print_one_diff_line(diff, diff_line, num_width_a, num_width_b):
436     # Keep this around because it's useful for debugging.
437     print_line_number_prefix = False
438     if print_line_number_prefix:
439         if diff.path_a is None:
440             # File added
441             print(
442                 "{b:{num_width_b}} ".format(b=diff_line["b"], num_width_b=num_width_b),
443                 end="",
444             )
445         elif diff.path_b is None:
446             # File removed
447             print(
448                 "{a:{num_width_a}} ".format(a=diff_line["a"], num_width_a=num_width_a),
449                 end="",
450             )
451         else:
452             # File modified
453             print(
454                 "{a:{num_width_a}} {b:{num_width_b}} ".format(
455                     a=diff_line.get("a", ""),
456                     b=diff_line.get("b", ""),
457                     num_width_a=num_width_a,
458                     num_width_b=num_width_b,
459                 ),
460                 end="",
461             )
462
463     print("| {}".format(diff_line["line"]))
464
465
466 def print_comments_matching_diff_line(comments, diff_line, revision):
467     for comment in comments:
468         if (
469             comment.side == "PARENT"
470             and "a" in diff_line
471             and diff_line["a"] == comment.line
472         ):
473             print()
474             print_comment(comment, revision)
475             print()
476
477         if (
478             comment.side == "REVISION"
479             and "b" in diff_line
480             and diff_line["b"] == comment.line
481         ):
482             print()
483             print_comment(comment, revision)
484             print()
485
486
487 def print_range_header(diff_slice):
488     # Print a diff hunk-like header that indicates where the following lines
489     # come from in a and b versions of the file.
490
491     line_start_a = diff_slice[0]["line-num-a"]
492     line_start_b = diff_slice[0]["line-num-b"]
493     context = diff_slice[0]["context"]
494     num_lines_a = 0
495     num_lines_b = 0
496
497     for diff_line in diff_slice:
498         if "a" in diff_line:
499             num_lines_a += 1
500
501         if "b" in diff_line:
502             num_lines_b += 1
503
504     print(
505         "| @@ -{},{} +{},{} @@ {}".format(
506             line_start_a, num_lines_a, line_start_b, num_lines_b, context
507         )
508     )
509
510
511 def print_diff_with_comments(server, diff, comments, revision):
512     assert type(comments) is list
513
514     if diff.path_a is not None:
515         print("| --- {}".format(diff.path_a))
516     else:
517         print("| --- /dev/null")
518
519     if diff.path_b is not None:
520         print("| +++ {}".format(diff.path_b))
521     else:
522         print("| +++ /dev/null")
523
524     diff_lines, line_mapping_a_to_diff, line_mapping_b_to_diff = render_diff(diff)
525
526     def comment_to_diff_range_idx(comment):
527         mapping = (
528             line_mapping_a_to_diff
529             if comment.side == "PARENT"
530             else line_mapping_b_to_diff
531         )
532
533         if comment.range is not None:
534             # Range comment.
535             start = mapping[comment.range.start_line]
536             end = mapping[comment.range.end_line]
537             return start, end
538         else:
539             # Line comment.
540             idx = mapping[comment.line]
541             return idx, idx
542
543     diff_line_ranges_to_print = []
544
545     # Compute a list of ranges of `diff_lines` we want to print, based on
546     # where the comments are.
547     #
548     # FIXME: This is broken for overlapping range comments, we should use a
549     # proper rangeset to merge ranges.
550     for comment in comments:
551         if comment.line is None:
552             # It's a file comment, doesn't matter for ranges.
553             continue
554
555         start_idx_in_diff, end_idx_in_diff = comment_to_diff_range_idx(comment)
556
557         # We want to print from this point.
558         low = max(0, start_idx_in_diff - 9)
559
560         # And up to this point (exclusive).
561         high = min(len(diff_lines) - 1, end_idx_in_diff + 10)
562
563         if len(diff_line_ranges_to_print) == 0:
564             # This is the first range we insert.
565             diff_line_ranges_to_print.append((low, high))
566         else:
567             prev_range = diff_line_ranges_to_print[-1]
568             if prev_range[1] >= low:
569                 # Overlap (or contiguous) with prev range, merge.
570                 diff_line_ranges_to_print[-1] = (prev_range[0], high)
571             else:
572                 # Disjoint from prev range.
573                 diff_line_ranges_to_print.append((low, high))
574
575     # First, print any file-level comments.
576     for comment in comments:
577         if comment.line is None:
578             print_comment(comment, revision)
579             print()
580             continue
581
582     # Print all diff ranges we want to print, with comments matching those lines.
583     for i, (low, high) in enumerate(diff_line_ranges_to_print):
584         diff_slice = diff_lines[low:high]
585
586         print_range_header(diff_slice)
587
588         # Figure out the maximal line number for a and b we'll need to display
589         # in this range
590         max_a_line = 0
591         max_b_line = 0
592
593         for diff_line in diff_slice:
594             if "a" in diff_line:
595                 max_a_line = max(max_a_line, diff_line["a"])
596
597             if "b" in diff_line:
598                 max_b_line = max(max_b_line, diff_line["b"])
599
600         num_width_a = len(str(max_a_line))
601         num_width_b = len(str(max_b_line))
602
603         for diff_line in diff_slice:
604             print_one_diff_line(diff, diff_line, num_width_a, num_width_b)
605             print_comments_matching_diff_line(comments, diff_line, revision)
606
607         if i != len(diff_line_ranges_to_print) - 1:
608             print()
609             print(" ...")
610             print()
611
612
613 def read_int():
614     while True:
615         print("? ", end="")
616         sys.stdout.flush()
617         answer = sys.stdin.readline().strip()
618
619         try:
620             return int(answer)
621         except ValueError:
622             print("Can't parse {} as an integer.".format(answer))
623
624
625 def choose(items, key_func, render_func):
626     by_key = {}
627
628     for item in items:
629         key = key_func(item)
630         text = render_func(item)
631         assert key not in by_key
632         by_key[key] = item
633
634         print("[{}] {}".format(key, text))
635
636     while True:
637         answer = read_int()
638         if answer in by_key:
639             return by_key[answer]
640         else:
641             print("Invalid choice.")
642
643
644 def main():
645     if len(sys.argv) not in (3, 5):
646         print("Invalid number of parameters.")
647         print()
648         print("Interactive usage: ./generate.py [server base address] [change number]")
649         print(
650             "Unattended usage:  ./generate.py [server base address] [change number] [author id] [comment timestamp]"
651         )
652         print()
653         print("Examples:")
654         print("  ./generate.py 'https://gnutoolchain-gerrit.osci.io/r' 483")
655         print(
656             "  ./generate.py 'https://gnutoolchain-gerrit.osci.io/r' 483 1000025 \"2019-11-05 23:52:21.000000000\""
657         )
658         sys.exit(1)
659
660     interactive = len(sys.argv) == 3
661
662     server_address = sys.argv[1]
663     change_number = int(sys.argv[2])
664
665     if server_address.endswith('/'):
666         server_address = server_address[:-1]
667
668     server = Server(server_address)
669
670     change = server.get_change(change_number)
671     if change is None:
672         raise Exception("Change {} does not exist.".format(change_number))
673
674     messages = server.get_change_messages(change)
675     messages = sorted(messages, key=lambda m: m.date)
676
677     if interactive:
678
679         class Count:
680             def __init__(self):
681                 self._n = 0
682
683             def __call__(self, item):
684                 self._n += 1
685                 return self._n
686
687         message = choose(
688             messages,
689             Count(),
690             lambda m: "By {} ({}) at '{}'".format(m.author.name, m.author.id, m.date),
691         )
692     else:
693         author_id = int(sys.argv[3])
694         timestamp = sys.argv[4]
695
696         for message in messages:
697             if message.author.id == author_id and message.date == timestamp:
698                 break
699         else:
700             raise Exception(
701                 "Could not find message corresponding to author {} and timestamp {}".format(
702                     author_id, timestamp
703                 )
704             )
705
706     # Look for code comments that were posted along this message.
707     comments_by_revision = server.get_change_message_comments(change, message)
708
709     # It is possible to comment on multiple revisions of a change at the same
710     # time...  so generate different diffs (each relative to the base) for each
711     # revision that was commented on.
712     for (revision, comments_by_path) in comments_by_revision.items():
713         for (path, comment_for_path) in comments_by_path.items():
714             # Get the information required to build the diff
715             diff_from_base_to_rev = server.get_diff(change, revision, path)
716
717             # Print it with interleaved comments.
718             print_diff_with_comments(
719                 server, diff_from_base_to_rev, comment_for_path, revision
720             )
721
722
723 if __name__ == "__main__":
724     main()