Bikarhêner:Balyozxane/skrîpt/py/removedubs.py

#!/usr/bin/env python3
"""
Bikaranîn


The following parameters are supported:

-always           The bot won't ask for confirmation when putting a page.

-showdiff         The bot will show the differences in the console.

-async            Edits will be performed asynchronously.

Use global -simulate option for test purposes. No changes to live wiki
will be done.
"""
#
# (C) w:ku:User:Balyozxane
#
# Distributed under the terms of the MIT license.
#
import pywikibot
from pywikibot.bot import SingleSiteBot, ConfigParserBot, AutomaticTWSummaryBot
from pywikibot import pagegenerators
import requests
import datetime
import mwparserfromhell
from kucosmetics import CANCEL, CosmeticChangesToolkit
import re

VERBOSE = False
TESTING = True


class AddTarixBot(
    SingleSiteBot,
    ConfigParserBot,
    AutomaticTWSummaryBot,
):
    use_redirects = False

    update_options = {
        'async': False,
        'showdiff': False,
        'ignore': CANCEL.MATCH,
    }

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.bot_name = "Bikarhêner:Balyozxane/skrîpt/py/removedubs.py"

    def do_cosmetics(self, old_text, kozmetik_cebu):
        cc_toolkit = CosmeticChangesToolkit(self.current_page,
                                            ignore=self.opt.ignore)
        cleaned_new_text = cc_toolkit.change(old_text)

        if cleaned_new_text is not False and cleaned_new_text != old_text:
            kozmetik_cebu = "; paqijiyên kozmetîk"

        return cleaned_new_text, kozmetik_cebu

    def treat_page(self) -> None:
        page = self.current_page

        page_text = page.text
        category_pattern = r'\[\[Kategorî:(.*?)\]\]'
        categories = re.findall(category_pattern, page_text)

        unique_categories = []
        duplicate_categories = set()

        for category in categories:
            if category not in unique_categories:
                unique_categories.append(category)
            else:
                duplicate_categories.add(category)

        if duplicate_categories:
            pywikibot.output("Duplicate categories found on page '{}'. Removing duplicates...".format(page.title()))
            new_text = page_text
            for category in duplicate_categories:
                # Find the first occurrence of the duplicate category
                index = new_text.find("[[Kategorî:{}]]".format(category))
                if index != -1:
                    # Find the next occurrence after the first one
                    next_index = new_text.find("[[Kategorî:{}]]".format(category), index + 1)
                    if next_index != -1:
                        # Remove the next occurrence
                        new_text = new_text[:next_index] + new_text[next_index:].replace(
                            "[[Kategorî:{}]]".format(category), '', 1)
                    else:
                        # If there's only one occurrence, remove it
                        new_text = new_text.replace("[[Kategorî:{}]]".format(category), '', 1)

            kozmetik_cebu = ""
            if page.text != new_text:
                if TESTING:
                    new_text = new_text
                else:
                    cleaned_new_text, kozmetik_cebu = self.do_cosmetics(new_text, kozmetik_cebu)
                    new_text = cleaned_new_text

                # Generate summary
                summary = f'[[{self.bot_name}|Bot]]: Kategoriyên ducarî hat jêbirin'
                summary += f'{kozmetik_cebu}'

                self.put_current(
                    new_text,
                    summary=summary,
                    asynchronous=self.opt['async'],
                    show_diff=self.opt['showdiff']
                )
        else:
            pywikibot.output("No duplicate categories found on page '{}'.".format(page.title()))


def main(*args: str) -> None:
    local_args = pywikibot.handle_args(args)
    gen_factory = pagegenerators.GeneratorFactory()
    local_args = gen_factory.handle_args(local_args)

    options = {}

    for arg in local_args:
        option, _, value = arg.partition(':')
        if option in ('-always', '-async', '-showdiff'):
            options[option[1:]] = True
        elif option == '-ignore':
            value = value.upper()
            try:
                options['ignore'] = getattr(CANCEL, value)
            except AttributeError:
                raise ValueError(f'Unknown ignore mode {value!r}!')

    gen = gen_factory.getCombinedGenerator(preload=True)

    if not pywikibot.bot.suggest_help(missing_generator=not gen):
        bot = AddTarixBot(generator=gen, **options)
        bot.run()


if __name__ == '__main__':
    main()