|
| 1 | +# Generated by Django 5.0.8 on 2024-09-12 18:22 |
| 2 | + |
| 3 | +import logging |
| 4 | +from django.db import migrations |
| 5 | +from django.db.models import Q |
| 6 | + |
| 7 | +logger = logging.getLogger(__name__) |
| 8 | + |
| 9 | +# Only apply the process to models that _could_ have tags |
| 10 | +model_names = [ |
| 11 | + "Product", |
| 12 | + "Endpoint", |
| 13 | + "Engagement", |
| 14 | + "Test", |
| 15 | + "Finding", |
| 16 | + "Finding_Template", |
| 17 | + "App_Analysis", |
| 18 | + "Objects_Product", |
| 19 | +] |
| 20 | + |
| 21 | + |
| 22 | +def clean_tag_value(tag: str) -> str: |
| 23 | + """ |
| 24 | + Clean each tag value by: |
| 25 | + - Converting all commas to hyphens |
| 26 | + - Converting all spaces to underscores |
| 27 | + - Removing all single/double quotes |
| 28 | + """ |
| 29 | + return tag.replace(",", "-").replace(" ", "_").replace('"', "").replace("'", "") |
| 30 | + |
| 31 | + |
| 32 | +def clean_all_tag_fields(apps, schema_editor): |
| 33 | + """ |
| 34 | + Cleans tag values for all models in the `model_names` list, removing unwanted characters. |
| 35 | + Updates both 'tags' and 'inherited_tags' fields where applicable. |
| 36 | + """ |
| 37 | + updated_count = {} |
| 38 | + for model_name in model_names: |
| 39 | + TaggedModel = apps.get_model("dojo", model_name) |
| 40 | + unique_tags_per_model = {} |
| 41 | + count_per_model = 0 |
| 42 | + # Only fetch the objects with tags that contain a character in violation |
| 43 | + queryset = ( |
| 44 | + TaggedModel.objects.filter( |
| 45 | + Q(**{"tags__name__icontains": ","}) |
| 46 | + | Q(**{"tags__name__icontains": " "}) |
| 47 | + | Q(**{"tags__name__icontains": '"'}) |
| 48 | + | Q(**{"tags__name__icontains": "'"}) |
| 49 | + ) |
| 50 | + .distinct() |
| 51 | + .prefetch_related("tags") |
| 52 | + ) |
| 53 | + # Iterate over each instance to clean the tags. The iterator is used here |
| 54 | + # to prevent loading the entire queryset into memory at once. Instead, we |
| 55 | + # will only process 500 objects at a time |
| 56 | + for instance in queryset.iterator(chunk_size=500): |
| 57 | + # Get the current list of tags to work with |
| 58 | + raw_tags = instance.tags.all() |
| 59 | + # Clean each tag here while preserving the original value |
| 60 | + cleaned_tags = {tag.name: clean_tag_value(tag.name) for tag in raw_tags} |
| 61 | + # Quick check to avoid writing things without impact |
| 62 | + if cleaned_tags: |
| 63 | + instance.tags.set(list(cleaned_tags.values()), clear=True) |
| 64 | + count_per_model += 1 |
| 65 | + # Update the running list of cleaned tags with the changes on this model |
| 66 | + unique_tags_per_model.update(cleaned_tags) |
| 67 | + # Add a quick logging statement every 100 objects cleaned |
| 68 | + if count_per_model > 0 and count_per_model % 100 == 0: |
| 69 | + logger.info( |
| 70 | + f"{TaggedModel.__name__}.tags: cleaned {count_per_model} tags..." |
| 71 | + ) |
| 72 | + # Update the final count of the tags cleaned for the given model |
| 73 | + if count_per_model: |
| 74 | + updated_count[f"{TaggedModel.__name__}"] = ( |
| 75 | + count_per_model, |
| 76 | + unique_tags_per_model, |
| 77 | + ) |
| 78 | + """ |
| 79 | + Write a helpful statement about what tags were changed for each model in the list. |
| 80 | + It looks something like this: |
| 81 | +
|
| 82 | + Product: 1 instances cleaned |
| 83 | + "quoted string with spaces" -> quoted_string_with_spaces |
| 84 | + "quoted with spaces, and also commas!" -> quoted_with_spaces-_and_also_commas! |
| 85 | + "quoted,comma,tag" -> quoted-comma-tag |
| 86 | + Engagement: 1 instances cleaned |
| 87 | + "quoted string with spaces" -> quoted_string_with_spaces |
| 88 | + "quoted with spaces, and also commas!" -> quoted_with_spaces-_and_also_commas! |
| 89 | + "quoted,comma,tag" -> quoted-comma-tag |
| 90 | + Test: 1 instances cleaned |
| 91 | + "quoted string with spaces" -> quoted_string_with_spaces |
| 92 | + "quoted with spaces, and also commas!" -> quoted_with_spaces-_and_also_commas! |
| 93 | + "quoted,comma,tag" -> quoted-comma-tag |
| 94 | + Finding: 1 instances cleaned |
| 95 | + "quoted string with spaces" -> quoted_string_with_spaces |
| 96 | + "quoted with spaces, and also commas!" -> quoted_with_spaces-_and_also_commas! |
| 97 | + "quoted,comma,tag" -> quoted-comma-tag |
| 98 | + """ |
| 99 | + for key, (count, tags) in updated_count.items(): |
| 100 | + logger.info(f"{key}: {count} instances cleaned") |
| 101 | + for old, new in tags.items(): |
| 102 | + if old != new: |
| 103 | + logger.info(f" {old} -> {new}") |
| 104 | + |
| 105 | + |
| 106 | +def cannot_turn_back_time(apps, schema_editor): |
| 107 | + """ |
| 108 | + We cannot possibly return to the original state without knowing |
| 109 | + the original value at the time the migration is revoked. Instead |
| 110 | + we will do nothing. |
| 111 | + """ |
| 112 | + pass |
| 113 | + |
| 114 | + |
| 115 | +class Migration(migrations.Migration): |
| 116 | + dependencies = [ |
| 117 | + ('dojo', '0234_alter_system_settings_maximum_password_length_and_more'), |
| 118 | + ] |
| 119 | + |
| 120 | + operations = [ |
| 121 | + migrations.RunPython(clean_all_tag_fields, cannot_turn_back_time), |
| 122 | + ] |
| 123 | + |
0 commit comments