Bikarhêner:Balyozxane/skrîpt/py/citeKurdifier.py

"""
python pwb.py updatewin -f:"citekurdifier.py" -s:"first_upper ji bo ziman"
To-do:


"""
import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import SingleSiteBot, ConfigParserBot, AutomaticTWSummaryBot
from kucosmetics import CANCEL, CosmeticChangesToolkit
from pywikibot.tools import first_upper
import json
import re
import mwparserfromhell


class CiteKurdifierBot(
    SingleSiteBot,
    ConfigParserBot,
    AutomaticTWSummaryBot,
):
    use_redirects = False
    update_options = {
        'async': False,
        'showdiff': False,
        'ignore': CANCEL.MATCH,
    }

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.bot_name = "Bikarhêner:Balyozxane/skrîpt/py/citeKurdifier.py"
        # Fetch JSON content from the specified page
        json_title = "Bikarhêner:Balyozxane/skrîpt/json/citeKurdifier.json"
        json_page = pywikibot.Page(self.site, json_title)
        self.json_content = json_page.text

        # Load JSON content into a dictionary
        try:
            self.parameter_conversions = json.loads(self.json_content)
            self.cite_templates = self.parameter_conversions.get('templates', {})
            self.basic_params = self.parameter_conversions.get('basic_params', {})
            self.numbered_params = self.parameter_conversions.get('numbered_params', {})
            self.months = self.parameter_conversions.get('month_list', {})
            self.date_params = self.parameter_conversions.get('date_params', {})
            self.url_status = self.parameter_conversions.get('rewşa_urlyê', {})
            self.ziman = self.parameter_conversions.get('ziman', {})

        except json.JSONDecodeError as e:
            pywikibot.error(f"Error decoding JSON content: {e}")
            self.parameter_conversions = {}

    def do_kozmetik(self, old_text):
        kozmetik_cebu = ""
        cc_toolkit = CosmeticChangesToolkit(self.current_page,
                                            ignore=self.opt.ignore)
        new_text, summaries = cc_toolkit.change(old_text)
        applied_summaries = ', '.join(summaries.values())
        if new_text is not False and new_text != old_text:
            kozmetik_cebu = "; paqijiyên kozmetîk"
            if applied_summaries:
                kozmetik_cebu += f' ({applied_summaries}.)'

        return new_text, kozmetik_cebu

    def treat_page(self) -> None:
        page = self.current_page
        if not page.exists():
            pywikibot.error(f"Skipping {page.title()} since it does not exist")
            return

        if page.namespace() != 0 and page.title() != 'Bikarhêner:Balyozxane/ceribandin':
            pywikibot.output(f"Skipping page '{page.title()}' because it is not in namespace 0")
            return

        text = page.text
        wikicode = mwparserfromhell.parse(text)

        # Iterate through each template in the page
        for template in wikicode.filter_templates():
            # Check if the template name matches any of the citation templates
            if self.should_process_template(template):
                self.replace_parameters(template.params)
                self.replace_date_values(template.params)
                self.replace_param_values(template.params)
                self.remove_params(template)
                self.replace_template_name(template)

        # Save the page
        new_text = str(wikicode)
        if ''.join(text.split()) != ''.join(new_text.split()):

            cleaned_new_text, kozmetik_cebu = self.do_kozmetik(new_text)

            summary = f'[[{self.bot_name}|Bot]]: Kurdîkirina çavkaniyan{kozmetik_cebu}'

            self.put_current(
                cleaned_new_text,
                summary=summary,
                asynchronous=self.opt['async'],
                show_diff=self.opt['showdiff']
            )

    def should_process_template(self, template):
        """Checks if a template should be processed based on self.cite_templates."""
        template_name = template.name
        template_name_lower = template_name.strip().lower()
        for key, value in self.cite_templates.items():
            if (
                    key.lower() == template_name_lower
                    or value.lower() == template_name_lower
            ):
                return True
        return False

    def replace_template_name(self, template):
        template_name = template.name.strip().lower()
        try:
            new_template_name = self.cite_templates[template_name]
            template.name = f'{new_template_name} '
        except KeyError:
            pass

    def remove_params(self, template):

        if template.has("df"):
            template.remove("df")

    def replace_parameters(self, params):
        for param in params:
            param_name = param.name.strip()

            # Basic replacements for parameter names
            if param_name.lower() in self.basic_params:
                new_param_name = self.basic_params[param_name.lower()]
                param.name = new_param_name

            # Numbered arguments replacements using regex
            for pattern, new_param_name in self.numbered_params.items():
                match = re.match(pattern, param_name)
                if match:
                    param.name = re.sub(pattern, new_param_name, param_name)

            # Add empty an space after param value
            param.value = f'{param.value.strip()} '

    def replace_date_values(self, params):
        for param in params:
            param_name = param.name.strip()
            # Date parameter handling
            if param_name.lower() in self.date_params:
                new_value = ""

                # First replacement pattern for existing format: Day Month Year
                match1 = re.search(r"(?P<day>\d+)\s+(?P<month>\w+)\s+(?P<year>\d+)", param.value.strip())
                # Second replacement pattern for new format: Month Day, Year
                match2 = re.search(r"(?P<month>\w+)\s+(?P<day>\d+),\s+(?P<year>\d+)", param.value.strip())

                # If the first replacement pattern matches
                if match1:
                    # Convert month name to Kurdish using month_list
                    kurdish_month = self.months.get(match1.group("month").lower(), match1.group("month"))

                    # Construct new value in Kurdish format
                    new_value = f"{match1.group('day')} {kurdish_month} {match1.group('year')}"

                # If the second replacement pattern matches
                elif match2:
                    # Convert month name to Kurdish using month_list
                    kurdish_month = self.months.get(match2.group("month").lower(), match2.group("month"))

                    # Construct new value in Kurdish format
                    new_value = f"{match2.group('day')} {kurdish_month} {match2.group('year')}"

                # Update the parameter value if a match was found
                if new_value:
                    param.value = new_value.strip() + " "

    def replace_param_values(self, params):
        for param in params:
            param_name = param.name.strip()

            if param_name.lower() == "çap":
                param_val = str(param.value.strip())
                match = re.search(r"^(\d+) ed\.?$", param_val)
                match2 = re.search(r"^(\d+)(th|rd|st|nd)$", param_val)
                if match:
                    new_value = re.sub(r"^(\d+) ed\.?$", r"\1", param_val)
                    param.value = new_value.strip() + " "
                elif match2:
                    new_value = re.sub(r"^(\d+)(th|rd|st|nd)$", r"\1", param_val)
                    param.value = new_value.strip() + " "

            elif param_name.lower() == "rewşa-urlyê":
                param_val = param.value.strip()
                if param_val:
                    try:

                        new_value = self.url_status[param_val]
                    except KeyError:
                        # Handle the missing key case:
                        new_value = param_val  # Keep the original value
                    param.value = new_value.strip() + " "

            elif param_name.lower() == "ziman":
                param_val = param.value.strip()
                if param_val:
                    try:

                        new_value = self.ziman[first_upper(param_val)]
                    except KeyError:
                        # Handle the missing key case:
                        new_value = param_val  # Keep the original value
                    param.value = new_value.strip() + " "

            elif param_name.lower() == "sernav":
                param_val = param.value.strip()
                if param_val.lower() in ["archived copy", "arşivlenmiş kopya"]:
                    param.value = "Kopîkirina arşîvê "


def main(*args: str) -> None:
    local_args = pywikibot.handle_args(args)
    gen_factory = pagegenerators.GeneratorFactory()
    local_args = gen_factory.handle_args(local_args)

    options = {}

    for arg in local_args:
        option, _, value = arg.partition(':')
        if option in ('-always', '-async', '-showdiff'):
            options[option[1:]] = True
        elif option == '-ignore':
            value = value.upper()
            try:
                options['ignore'] = getattr(CANCEL, value)
            except AttributeError:
                raise ValueError(f'Unknown ignore mode {value!r}!')

    gen = gen_factory.getCombinedGenerator(preload=True)

    if not pywikibot.bot.suggest_help(missing_generator=not gen):
        bot = CiteKurdifierBot(generator=gen, **options)
        bot.run()


if __name__ == '__main__':
    main()