MT#56231 gerrit: crontab to cleanup

Automate the process of cleaning up the Gerrit info we keep.
We query Gerrit server and check if review is merged or abandoned.
If so, we remove data from database and query Jenkins to remove
the related PPA.

Ideally this should not be needed but it seems there are some
cases/scenarios that we don't manage properly.

* will be executed each 15th day of the month at 7:30
* add timezone info to Gerrit fixtures to reduce warning

Change-Id: Ic5ddc3b646532257bd7747443a6fdead4ff28544
master
Victor Seva 2 years ago committed by Víctor Seva
parent 3200efe57a
commit 4fab980459

@ -1,24 +1,24 @@
- model: repoapi.gerritrepoinfo - model: repoapi.gerritrepoinfo
pk: 47979 pk: 47979
fields: fields:
created: '2023-03-03 08:46:17' created: '2023-03-03 08:46:17+0000'
gerrit_change: '67631' gerrit_change: '67631'
modified: '2023-03-03 09:09:25' modified: '2023-03-03 09:09:25+0000'
param_ppa: gerrit_alessio_56718_bis_10_5_1 param_ppa: gerrit_alessio_56718_bis_10_5_1
projectname: templates projectname: templates
- model: repoapi.gerritrepoinfo - model: repoapi.gerritrepoinfo
pk: 47877 pk: 47877
fields: fields:
created: '1977-01-01 00:00:00' created: '1977-01-01 00:00:00+0000'
gerrit_change: '67510' gerrit_change: '67510'
modified: '2022-03-02 16:24:25' modified: '2022-03-02 16:24:25+0000'
param_ppa: gerrit_alessio_56718_bis_11_1 param_ppa: gerrit_alessio_56718_bis_11_1
projectname: sems-pbx projectname: sems-pbx
- model: repoapi.gerritrepoinfo - model: repoapi.gerritrepoinfo
pk: 12 pk: 12
fields: fields:
created: '1977-01-01 00:00:00' created: '1977-01-01 00:00:00+0000'
gerrit_change: '13200' gerrit_change: '13200'
modified: '2022-03-02 07:57:21' modified: '2022-03-02 07:57:21+0000'
param_ppa: gerrit_pu_collectd-abolition param_ppa: gerrit_pu_collectd-abolition
projectname: unknown projectname: unknown

@ -14,15 +14,10 @@
# with this program. If not, see <http://www.gnu.org/licenses/>. # with this program. If not, see <http://www.gnu.org/licenses/>.
from datetime import date from datetime import date
from datetime import datetime from datetime import datetime
from datetime import timedelta
from django.conf import settings
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from requests.exceptions import HTTPError
from gerrit.utils import get_change_info from gerrit import tasks
from gerrit.utils import get_datetime
from repoapi.models.gri import GerritRepoInfo
class Command(BaseCommand): class Command(BaseCommand):
@ -45,45 +40,11 @@ class Command(BaseCommand):
) )
def refresh(self, *args, **options): def refresh(self, *args, **options):
qs = GerritRepoInfo.objects.filter(created__date=date(1977, 1, 1)) tasks.refresh(options["dry_run"])
for gri in qs.iterator():
try:
info = get_change_info(gri.gerrit_change)
gri.created = get_datetime(info["created"])
gri.modified = get_datetime(info["updated"])
# don't update modified field on save
gri.update_modified = False
if options["dry_run"]:
self.stdout.write(
f"{gri} would be changed to "
f" created:{gri.created}"
f" modified:{gri.modified}"
)
else:
gri.save()
except HTTPError:
self.stderr.write(f"{gri} not found, remove it from db")
gri.delete()
def cleanup(self, *args, **options): def cleanup(self, *args, **options):
max_date = options["today"] - timedelta(weeks=options["weeks"]) tasks.cleanup(
self.stderr.write(f"max_date:{max_date}") options["weeks"], options["dry_run"], options["today"].isoformat()
if settings.DEBUG:
self.stderr.write("debug ON")
manager = GerritRepoInfo.objects
qs = manager.filter(modified__lt=max_date)
for gri in qs.iterator():
info = get_change_info(gri.gerrit_change)
status = info["status"]
if status in ["MERGED", "ABANDONED"]:
if options["dry_run"]:
self.stdout.write(
f"{gri} {status}, remove from db, [dry-run]"
)
else:
self.stdout.write(f"{gri} {status}, remove from db")
manager.review_removed(
gri.param_ppa, gri.gerrit_change, gri.projectname
) )
def handle(self, *args, **options): def handle(self, *args, **options):

@ -0,0 +1,76 @@
# Copyright (C) 2023 The Sipwise Team - http://sipwise.com
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
from datetime import date
from datetime import datetime
from datetime import timedelta
import structlog
from celery import shared_task
from django.apps import apps
from requests.exceptions import HTTPError
from gerrit.utils import get_change_info
from gerrit.utils import get_datetime
logger = structlog.get_logger(__name__)
@shared_task(ignore_result=True)
def cleanup(weeks: int, dry_run=False, today=None):
if today is None:
today = datetime.now()
else:
today = date.fromisoformat(today)
max_date = today - timedelta(weeks=weeks)
logger.debug(f"max_date:{max_date}")
GerritRepoInfo = apps.get_model("repoapi", "GerritRepoInfo")
manager = GerritRepoInfo.objects
qs = manager.filter(modified__lt=max_date)
for gri in qs.iterator():
info = get_change_info(gri.gerrit_change)
status = info["status"]
if status not in ["MERGED", "ABANDONED"]:
continue
if dry_run:
logger.info(f"{gri} {status}, remove from db, [dry-run]")
else:
logger.info(f"{gri} {status}, remove from db")
manager.review_removed(
gri.param_ppa, gri.gerrit_change, gri.projectname
)
@shared_task(ignore_result=True)
def refresh(dry_run=False):
GerritRepoInfo = apps.get_model("repoapi", "GerritRepoInfo")
qs = GerritRepoInfo.objects.filter(created__date=date(1977, 1, 1))
for gri in qs.iterator():
try:
info = get_change_info(gri.gerrit_change)
gri.created = get_datetime(info["created"])
gri.modified = get_datetime(info["updated"])
# don't update modified field on save
gri.update_modified = False
if dry_run:
logger.info(
f"{gri} would be changed to "
f" created:{gri.created}"
f" modified:{gri.modified}"
)
else:
gri.save()
except HTTPError:
logger.error(f"{gri} not found, remove it from db")
gri.delete()

@ -54,7 +54,7 @@ value = json.loads(change_info)
class refreshTest(TestCase): class refreshTest(TestCase):
fixtures = ["test_gerrit_commands"] fixtures = ["test_gerrit_commands"]
@patch("gerrit.management.commands.gerrit.get_change_info") @patch("gerrit.tasks.get_change_info")
def test_refresh(self, gci): def test_refresh(self, gci):
gci.return_value = value gci.return_value = value
qs = GerritRepoInfo.objects qs = GerritRepoInfo.objects
@ -69,7 +69,7 @@ class refreshTest(TestCase):
self.assertEqual(qs_filter.count(), 3) self.assertEqual(qs_filter.count(), 3)
@patch("repoapi.models.gri.jenkins_remove_ppa") @patch("repoapi.models.gri.jenkins_remove_ppa")
@patch("gerrit.management.commands.gerrit.get_change_info") @patch("gerrit.tasks.get_change_info")
def test_cleanup(self, gci, jrp): def test_cleanup(self, gci, jrp):
value["status"] = "MERGED" value["status"] = "MERGED"
gci.return_value = value gci.return_value = value

@ -15,6 +15,7 @@
import os import os
from celery import Celery from celery import Celery
from celery.schedules import crontab
from django_structlog.celery.steps import DjangoStructLogInitStep from django_structlog.celery.steps import DjangoStructLogInitStep
# set the default Django settings module for the 'celery' program. # set the default Django settings module for the 'celery' program.
@ -48,3 +49,12 @@ def process_result(jbi_id: str, path_envVars: str):
"release_changed.tasks.process_result", "release_changed.tasks.process_result",
args=[jbi_id, path_envVars], args=[jbi_id, path_envVars],
) )
@app.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
sender.add_periodic_task(
crontab(hour=7, minute=30, day_of_month=15),
"gerrit.tasks.cleanup",
args=[4],
)

Loading…
Cancel
Save