From 4fab980459341695a4ccfd28534e2ca46a3c06ad Mon Sep 17 00:00:00 2001 From: Victor Seva <vseva@sipwise.com> Date: Tue, 16 May 2023 15:09:13 +0200 Subject: [PATCH] MT#56231 gerrit: crontab to cleanup Automate the process of cleaning up the Gerrit info we keep. We query Gerrit server and check if review is merged or abandoned. If so, we remove data from database and query Jenkins to remove the related PPA. Ideally this should not be needed but it seems there are some cases/scenarios that we don't manage properly. * will be executed each 15th day of the month at 7:30 * add timezone info to Gerrit fixtures to reduce warning Change-Id: Ic5ddc3b646532257bd7747443a6fdead4ff28544 --- gerrit/fixtures/test_gerrit_commands.yaml | 12 ++-- gerrit/management/commands/gerrit.py | 49 ++------------- gerrit/tasks.py | 76 +++++++++++++++++++++++ gerrit/test_commands.py | 4 +- repoapi/celery.py | 10 +++ 5 files changed, 99 insertions(+), 52 deletions(-) create mode 100644 gerrit/tasks.py diff --git a/gerrit/fixtures/test_gerrit_commands.yaml b/gerrit/fixtures/test_gerrit_commands.yaml index 11ae968..032187c 100644 --- a/gerrit/fixtures/test_gerrit_commands.yaml +++ b/gerrit/fixtures/test_gerrit_commands.yaml @@ -1,24 +1,24 @@ - model: repoapi.gerritrepoinfo pk: 47979 fields: - created: '2023-03-03 08:46:17' + created: '2023-03-03 08:46:17+0000' gerrit_change: '67631' - modified: '2023-03-03 09:09:25' + modified: '2023-03-03 09:09:25+0000' param_ppa: gerrit_alessio_56718_bis_10_5_1 projectname: templates - model: repoapi.gerritrepoinfo pk: 47877 fields: - created: '1977-01-01 00:00:00' + created: '1977-01-01 00:00:00+0000' gerrit_change: '67510' - modified: '2022-03-02 16:24:25' + modified: '2022-03-02 16:24:25+0000' param_ppa: gerrit_alessio_56718_bis_11_1 projectname: sems-pbx - model: repoapi.gerritrepoinfo pk: 12 fields: - created: '1977-01-01 00:00:00' + created: '1977-01-01 00:00:00+0000' gerrit_change: '13200' - modified: '2022-03-02 07:57:21' + modified: '2022-03-02 07:57:21+0000' param_ppa: gerrit_pu_collectd-abolition projectname: unknown diff --git a/gerrit/management/commands/gerrit.py b/gerrit/management/commands/gerrit.py index 2001caf..b751844 100644 --- a/gerrit/management/commands/gerrit.py +++ b/gerrit/management/commands/gerrit.py @@ -14,15 +14,10 @@ # with this program. If not, see <http://www.gnu.org/licenses/>. from datetime import date from datetime import datetime -from datetime import timedelta -from django.conf import settings from django.core.management.base import BaseCommand -from requests.exceptions import HTTPError -from gerrit.utils import get_change_info -from gerrit.utils import get_datetime -from repoapi.models.gri import GerritRepoInfo +from gerrit import tasks class Command(BaseCommand): @@ -45,46 +40,12 @@ class Command(BaseCommand): ) def refresh(self, *args, **options): - qs = GerritRepoInfo.objects.filter(created__date=date(1977, 1, 1)) - for gri in qs.iterator(): - try: - info = get_change_info(gri.gerrit_change) - gri.created = get_datetime(info["created"]) - gri.modified = get_datetime(info["updated"]) - # don't update modified field on save - gri.update_modified = False - if options["dry_run"]: - self.stdout.write( - f"{gri} would be changed to " - f" created:{gri.created}" - f" modified:{gri.modified}" - ) - else: - gri.save() - except HTTPError: - self.stderr.write(f"{gri} not found, remove it from db") - gri.delete() + tasks.refresh(options["dry_run"]) def cleanup(self, *args, **options): - max_date = options["today"] - timedelta(weeks=options["weeks"]) - self.stderr.write(f"max_date:{max_date}") - if settings.DEBUG: - self.stderr.write("debug ON") - manager = GerritRepoInfo.objects - qs = manager.filter(modified__lt=max_date) - for gri in qs.iterator(): - info = get_change_info(gri.gerrit_change) - status = info["status"] - if status in ["MERGED", "ABANDONED"]: - if options["dry_run"]: - self.stdout.write( - f"{gri} {status}, remove from db, [dry-run]" - ) - else: - self.stdout.write(f"{gri} {status}, remove from db") - manager.review_removed( - gri.param_ppa, gri.gerrit_change, gri.projectname - ) + tasks.cleanup( + options["weeks"], options["dry_run"], options["today"].isoformat() + ) def handle(self, *args, **options): action = getattr(self, options["action"]) diff --git a/gerrit/tasks.py b/gerrit/tasks.py new file mode 100644 index 0000000..6baf1eb --- /dev/null +++ b/gerrit/tasks.py @@ -0,0 +1,76 @@ +# Copyright (C) 2023 The Sipwise Team - http://sipwise.com +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see <http://www.gnu.org/licenses/>. +from datetime import date +from datetime import datetime +from datetime import timedelta + +import structlog +from celery import shared_task +from django.apps import apps +from requests.exceptions import HTTPError + +from gerrit.utils import get_change_info +from gerrit.utils import get_datetime + +logger = structlog.get_logger(__name__) + + +@shared_task(ignore_result=True) +def cleanup(weeks: int, dry_run=False, today=None): + if today is None: + today = datetime.now() + else: + today = date.fromisoformat(today) + max_date = today - timedelta(weeks=weeks) + logger.debug(f"max_date:{max_date}") + GerritRepoInfo = apps.get_model("repoapi", "GerritRepoInfo") + manager = GerritRepoInfo.objects + qs = manager.filter(modified__lt=max_date) + for gri in qs.iterator(): + info = get_change_info(gri.gerrit_change) + status = info["status"] + if status not in ["MERGED", "ABANDONED"]: + continue + if dry_run: + logger.info(f"{gri} {status}, remove from db, [dry-run]") + else: + logger.info(f"{gri} {status}, remove from db") + manager.review_removed( + gri.param_ppa, gri.gerrit_change, gri.projectname + ) + + +@shared_task(ignore_result=True) +def refresh(dry_run=False): + GerritRepoInfo = apps.get_model("repoapi", "GerritRepoInfo") + qs = GerritRepoInfo.objects.filter(created__date=date(1977, 1, 1)) + for gri in qs.iterator(): + try: + info = get_change_info(gri.gerrit_change) + gri.created = get_datetime(info["created"]) + gri.modified = get_datetime(info["updated"]) + # don't update modified field on save + gri.update_modified = False + if dry_run: + logger.info( + f"{gri} would be changed to " + f" created:{gri.created}" + f" modified:{gri.modified}" + ) + else: + gri.save() + except HTTPError: + logger.error(f"{gri} not found, remove it from db") + gri.delete() diff --git a/gerrit/test_commands.py b/gerrit/test_commands.py index 45a196b..1271823 100644 --- a/gerrit/test_commands.py +++ b/gerrit/test_commands.py @@ -54,7 +54,7 @@ value = json.loads(change_info) class refreshTest(TestCase): fixtures = ["test_gerrit_commands"] - @patch("gerrit.management.commands.gerrit.get_change_info") + @patch("gerrit.tasks.get_change_info") def test_refresh(self, gci): gci.return_value = value qs = GerritRepoInfo.objects @@ -69,7 +69,7 @@ class refreshTest(TestCase): self.assertEqual(qs_filter.count(), 3) @patch("repoapi.models.gri.jenkins_remove_ppa") - @patch("gerrit.management.commands.gerrit.get_change_info") + @patch("gerrit.tasks.get_change_info") def test_cleanup(self, gci, jrp): value["status"] = "MERGED" gci.return_value = value diff --git a/repoapi/celery.py b/repoapi/celery.py index 7fca54c..ee0d06c 100644 --- a/repoapi/celery.py +++ b/repoapi/celery.py @@ -15,6 +15,7 @@ import os from celery import Celery +from celery.schedules import crontab from django_structlog.celery.steps import DjangoStructLogInitStep # set the default Django settings module for the 'celery' program. @@ -48,3 +49,12 @@ def process_result(jbi_id: str, path_envVars: str): "release_changed.tasks.process_result", args=[jbi_id, path_envVars], ) + + +@app.on_after_configure.connect +def setup_periodic_tasks(sender, **kwargs): + sender.add_periodic_task( + crontab(hour=7, minute=30, day_of_month=15), + "gerrit.tasks.cleanup", + args=[4], + )