Bikarhêner:Balyozxane/skrîpt/py/kuCosmeticsCore.py

#!/usr/bin/env python3
"""
python pwb.py updatewin -f:"kucosmetics.py" -s:"fix assignNamesToUnnamedRefs"

this is a fork of master cosmetic_changes.py which comes with pywikibot. The script can run standalone using [[Bikarhêner:Balyozxane/skrîpt/py/kuCosmeticsRun.py]] which doesn't edit the page if only whitespace changes are detected.

Added:

fixVrefNames --> renames visual editor ref names.

replaceDeprecatedTemplates --> changes redirected templates to target template using a json file ([[Bikarhêner:Balyozxane/skrîpt/py/listeyasablonan.py]]).

fixOthers --> A few standard changes for ku.wiki

replaceDeprecatedParams --> Uses WP:AutoWikiBrowser/Rename template parameters to replace deprecated/English parameters

removeDupeCats --> Removes dublicate categories

fixAgahidankSpace --> standardizes the number of space characters between Agahîdank templates

removeSelfCat --> removes category from self-categoriezed cats

fixPunctAfterTempl --> Niqeteşaniya piştî şablonên wekî 'çavkanî hewce ye' dixe berî şablonê

fixMainCat --> add main cat from wikidata or create same as page title
"""
import re
import json
import string
import mytools
import requests
import pywikibot
import mwparserfromhell
from enum import IntEnum
from mytools import ucfirst
from pywikibot import textlib
from typing import Any, Union, Tuple
from urllib.parse import urlparse, urlunparse
from pywikibot.backports import Callable, Dict, List, Match, Pattern
from pywikibot.exceptions import InvalidTitleError
from pywikibot.textlib import FILE_LINK_REGEX
from pywikibot.tools import first_lower, first_upper
from pywikibot.tools.chars import url2string

try:
    import stdnum.isbn as stdnum_isbn
except ImportError:
    stdnum_isbn = None

# Subpage templates. Must be in lower case,
# whereas subpage itself must be case sensitive
moved_links = {
    'ku': (['documentation', 'belgekirin'], '/belge'),
}

VERBOSE = False
TESTING = False


class CANCEL(IntEnum):
    """Cancel level to ignore exceptions.

    If an error occurred and either skips the page or the method
    or a single match. ALL raises the exception.

    .. versionadded:: 6.3
    """

    ALL = 0
    PAGE = 1
    METHOD = 2
    MATCH = 3


def _format_isbn_match(match: Match[str], strict: bool = True) -> str:
    """Helper function to validate and format a single matched ISBN."""
    if not stdnum_isbn:
        raise NotImplementedError(
            'ISBN functionality not available. Install stdnum package.')

    isbn = match['code']
    try:
        stdnum_isbn.validate(isbn)
    except stdnum_isbn.ValidationError as e:
        if strict:
            raise
        pywikibot.log(f'ISBN "{isbn}" validation error: {e}')
        return isbn

    return stdnum_isbn.format(isbn)


def _reformat_ISBNs(text: str, strict: bool = True) -> str:
    """Helper function to normalise ISBNs in text.

    :raises Exception: Invalid ISBN encountered when strict enabled
    """
    return textlib.reformat_ISBNs(
        text, lambda match: _format_isbn_match(match, strict=strict))


def do_kozmetik(page, text, ignore=CANCEL.MATCH, show_diff=False):
    kozmetik_cebu = ""
    cc_toolkit = CosmeticChangesToolkit(page,
                                        show_diff=show_diff,
                                        ignore=ignore)
    new_text, summaries = cc_toolkit.change(text)
    applied_summaries = ', '.join(summaries.values())
    if new_text is not False and new_text != text:
        kozmetik_cebu = "; paqijiyên kozmetîk"
        if applied_summaries:
            kozmetik_cebu += f' ({applied_summaries}.)'

    return new_text, kozmetik_cebu


class CosmeticChangesToolkit:
    """Cosmetic changes toolkit.

    .. versionchanged:: 7.0
       `from_page()` method was removed
    """

    def __init__(self, page: 'pywikibot.page.BasePage', *,
                 show_diff: bool = False,
                 ignore: IntEnum = CANCEL.ALL) -> None:

        """Initializer.

        .. versionchanged:: 5.2
           instantiate the CosmeticChangesToolkit from a page object;
           only allow keyword arguments except for page parameter;
           `namespace` and `pageTitle` parameters are deprecated

        .. versionchanged:: 7.0
           `namespace` and `pageTitle` parameters were removed

        :param page: the Page object containing the text to be modified
        :param show_diff: show difference after replacements
        :param ignore: ignores if an error occurred and either skips the page
            or only that method. It can be set one of the CANCEL constants
        """
        global VERBOSE
        VERBOSE = show_diff

        if page.site.sitename != 'wikipedia:ku':
            raise ValueError("This script should only be used on ku:wikipedia")

        self.site = page.site
        self.current_page = page
        self.title = page.title()
        self.namespace = page.namespace()

        self.show_diff = show_diff
        self.template = (self.namespace == 10)
        self.talkpage = self.namespace >= 0 and self.namespace % 2 == 1
        self.ignore = ignore
        self.summaries = {}

        self.tarix = mytools.get_cur_month_year()
        self.is_bekategori = mytools.TagHelpers.is_bekategori(page)

        if self.namespace == 0 or TESTING:
            self.unhidden_cats = mytools.get_unhidden_categories('ku', self.title)

            self.is_disambig = page.isDisambig()
            self.gotara_zaravayan = mytools.zaravayen_din(page.categories())
            self.sernav_templates = mytools.get_cat_members(self.site, "Şablonên ji bo sererastkirina sernavê rûpelê",
                                                            10)
            append_sernav = ['Danasîna kurt', 'Bikaranîna dîroka rms']
            self.sernav_templates.extend(append_sernav)
            self.contains_sewi_cat = mytools.is_category_in_page(page, 'Hemû gotarên sêwî')

            self.contains_sitil_cat = mytools.is_category_in_page(page, 'Hemû şitil')
            self.is_sewi = mytools.TagHelpers.is_sewi(page)
            self.is_sitil = mytools.TagHelpers.is_sitil(page)

            self.is_liste = mytools.is_liste(self.site, self.unhidden_cats)

        self.common_methods = [
            self.removeLtrMark,
            self.replaceDeprecatedTemplates,

            self.addOrphanTag,
            self.removeOrphanTag,
            self.addUncatTag,
            self.removeUncatTag,
            self.fixLead,
            self.addStubTag,
            self.removeStubTag,

            self.fixApostSign,
            self.fixNowiki,
            self.fixSelfInterwiki,
            self.fixMainCat,
            self.standardizePageFooter,
            self.fixSyntaxSave,
            self.cleanUpLinks,
            self.cleanUpSectionHeaders,
            self.putSpacesInLists,
            self.translateAndCapitalizeNamespaces,
            self.translateMagicWords,
            self.resolveHtmlEntities,
            self.removeNonBreakingSpaceBeforePercent,

            self.fixHtml,
            self.fixReferences,
            self.assignNamesToUnnamedRefs,
            self.fixVrefNames,
            self.fixStyle,
            self.fixTypo,
            self.fixSectionTitles,
            self.replaceDeprecatedParams,
            self.removeDupeCats,
            self.removeDupeParam,
            self.fixAgahidankSpace,
            self.removeSelfCat,
            self.fixPunctAfterTempl,
            self.removeUselessSpaces
        ]
        if stdnum_isbn:
            self.common_methods.append(self.fix_ISBN)

    # Define the explanation for each method
    method_explanations = {
        'removeLtrMark': '--U+200E',
        'addOrphanTag': '+{{Sêwî}}',
        'removeOrphanTag': '--{{Sêwî}}',
        'fixLead': 'Destpêkê standard kir',
        'addStubTag': '+{{Şitil}}',
        'removeStubTag': '--{{Şitil}}',
        'addUncatTag': '+{{Bêkategorî}}',
        'removeUncatTag': '--{{Bêkategorî}}',

        'assignNamesToUnnamedRefs': 'Nav li ref-ê zêde kir',

        'fixApostSign': 'Apostrof rast kir',
        'fixNowiki': '--<nowiki/>',
        'fixSelfInterwiki': '--înterwîkî',
        'fix_ISBN': 'ISBN sererast kir',
        'fixMainCat': '+Kategoriya sereke',
        'standardizePageFooter': 'Binê standard kir',
        'fixSyntaxSave': 'Xeletiyên sentaksê rast kir',
        'cleanUpLinks': 'Lînk paqij kir',
        'cleanUpSectionHeaders': 'Valahiya beşan rast kir',
        'putSpacesInLists': '+Valahiya lîsteyan',
        'translateAndCapitalizeNamespaces': 'Valahiya nav rast kir',
        'translateMagicWords': 'Kelîmeyên sihirî rast kir',
        'replaceDeprecatedTemplates': 'Şablonên beralîkirî guhart',
        'resolveHtmlEntities': 'HTML rast kir',
        'removeUselessSpaces': '--Valahiyên nehewce',
        'removeNonBreakingSpaceBeforePercent': '--Valahiya berî sedî',
        'fixHtml': 'Xeletiyên HTMLê rast kir',
        'fixReferences': 'Ref rast kir',
        'fixVrefNames': 'Navên ref-an rast kir',
        'fixStyle': 'Stîl rast kir',
        'fixTypo': 'Yekeyan rast kir',
        'fixSectionTitles': 'Sernavên beşan rast kir',
        'replaceDeprecatedParams': 'Parametreyên kevn rast kir',
        'removeDupeCats': '--Kategoriya ducarî',
        'fixAgahidankSpace': 'Valahiya agahîdankê standard kir',
        'removeSelfCat': '--Kategoriya li ser xwe',
        'removeDupeParam': '--Parametreya ducarî',
        'fixPunctAfterTempl': 'Niqteşanî piştî şablonê rast kir'
    }

    def safe_execute(self, method: Callable[[str], str], text: str) -> str:
        """Execute the method and catch exceptions if enabled."""
        result = None
        try:
            result = method(text)
        except Exception as e:
            if self.ignore == CANCEL.METHOD:
                pywikibot.warning('Unable to perform "{}" on "{}"!'
                                  .format(method.__name__, self.title))
                pywikibot.error(e)
            else:
                raise
        return text if result is None else result

    def _check_modification(self, method_name: str, old_text: str, new_text: str) -> None:
        """Check if the text is modified by a method and generate a summary."""
        if old_text != new_text:
            summary = self.method_explanations.get(method_name, 'sererastkirinên din')
            self.summaries[method_name] = summary

    def _change(self, text: str) -> str:
        """Execute all clean up methods."""
        modified_text = text
        for method in self.common_methods:
            old_text = modified_text
            modified_text = self.safe_execute(method, modified_text)
            self._check_modification(method.__name__, old_text, modified_text)
        return modified_text

    def change(self, text: str) -> Tuple[str, Dict[Any, Any]]:
        """Execute all clean up methods and catch errors if activated."""
        try:
            new_text = self._change(text)
        except Exception as e:
            if self.ignore == CANCEL.PAGE:
                pywikibot.warning('Skipped "{}", because an error occurred.'
                                  .format(self.title))
                pywikibot.error(e)
                return "", {}  # Return empty string and empty dictionary
            raise
        else:
            # if self.show_diff:
            # pywikibot.showDiff(text, new_text)
            return new_text, self.summaries

    def get_main_cat(self, title: str) -> Union[dict, None]:
        """Get the P910 value from Wikidata for the given page."""

        # Construct the Wikidata API URL
        wikidata_api_url = 'https://www.wikidata.org/w/api.php'
        params = {
            'action': 'wbgetentities',
            'sites': 'kuwiki',
            'titles': title,
            'props': 'claims|sitelinks',
            'format': 'json'
        }

        # Make the API request
        try:
            response = requests.get(wikidata_api_url, params=params)
            response.raise_for_status()  # Raise an exception for bad responses
        except requests.exceptions.RequestException as e:
            print(f"Error fetching data from Wikidata: {e}")
            return None

        data = response.json()

        # Check if the response contains the item ID
        entities = data.get('entities')
        if not entities:
            return None

        # Extract the item ID
        item_id = next(iter(entities))
        item_data = entities[item_id]

        # Check if the item has the P910 property
        claims = item_data.get('claims', {})
        P910_claims = claims.get('P910', [])
        if not P910_claims:
            return None

        # Get the target value from the claim
        P910_claim = P910_claims[0]
        mainsnak = P910_claim.get('mainsnak', {})
        datavalue = mainsnak.get('datavalue', {})
        value = datavalue.get('value', {})
        target_id = value.get('id')

        sitelinks = item_data.get('sitelinks', {})
        enwiki_page_dict = sitelinks.get('enwiki', None)

        # Check if enwiki_page_dict is None
        if enwiki_page_dict is None:
            return None

        enwiki_page = enwiki_page_dict.get('title', None)

        if target_id and enwiki_page:
            if VERBOSE:
                print(f"QID  main_Cat: {target_id}")
                print(f"enwiki_page for current page: {enwiki_page}")

            retr_links = mytools.get_sitelinks_qid(target_id, ['ku', 'en'])
            kuwiki_main = retr_links.get('kuwiki')
            enwiki_main = retr_links.get('enwiki')

            result = {}

            if kuwiki_main:
                result["kuwiki"] = kuwiki_main
                return result
            else:
                if enwiki_main:
                    if enwiki_main.replace('Category:', '') == enwiki_page:
                        result["enwiki"] = enwiki_main
                        return result
                    else:
                        return None
        else:
            return None

    def create_main(self, page, enwiki_page):
        new_cat_title = 'Kategorî:' + page.title()
        new_cat_page = pywikibot.Page(self.site, new_cat_title)

        if new_cat_page.exists():
            if VERBOSE:
                print('Kategorî jixwe heye. Dev jê berde.')
            return None

        page_text = '{{subst:bêkategorî}}\n{{standard-kat}}'
        page_text += f'\n\n[[en:{enwiki_page}]]'
        new_cat_page.text = page_text

        summary = f'[[User:Balyozxane/skrîpt/py/kuCosmeticsCore.py|Bot]]: Wekheva [[en:{enwiki_page}]] hat çêkirin'
        if not TESTING:
            new_cat_page.save(summary=summary)
        return pywikibot.Category(self.site, new_cat_title, sort_key=' ')

    def fixSectionTitles(self, text: str) -> str:
        if self.namespace != 0 and not TESTING:
            return text

        if self.is_disambig or self.gotara_zaravayan:
            return text

        replacements = {
            r'==\s*[gG]ir[eê]dan[aeêîi]n?\s+[Dd]erv(a|eyî|ê|e)\s*==': '== Girêdanên derve ==',
            r'==\s*Erdn[îi]garî?\s*==': '== Erdnîgarî ==',
            r'==\s*[Çç]ava?kanî\s*==': '== Çavkanî ==',
            r'==\s*[Tt]ûrîzm\s*==': '== Turîzm ==',
            r'==\s*[êeÊe]t[iî]m[ao]lo[gj]î\s*==': '== Etîmolojî ==',
            r'==\s*[Dd][iî]rok\s*==': '== Dîrok ==',
            r'==\s*[bB]in[eê]r[eê] [Jj]î\s*==': '== Binêre herwiha =='

        }

        for pattern, replacement in replacements.items():
            text = re.sub(pattern, replacement, text)

        return text

    def fixNowiki(self, text: str) -> str:
        if self.namespace != 0 and not TESTING:
            return text

        text = re.sub('\[\[([^]]+)]]<nowiki/>', '[[\\1]]', text)

        return text

    def fixPunctAfterTempl(self, text: str) -> str:
        """
        Replace specified template names with a punctuation mark followed by the template.

        :param text: The input wiki text.
        :return: The modified wiki text.
        """

        if self.namespace != 0 and not TESTING:
            return text

        # Define punctuation marks
        punctuation_marks = [",", ".", ":", ";", "!", "?"]
        template_names = ['Çavkanî hewce ye', 'Ne kurdî-biçûk', 'Zelalkirin hewce ye']

        # Iterate over template names
        for template_name in template_names:
            # Define the pattern to match the template followed by punctuation
            pattern = rf'\s*{{{{\s*{template_name}([^}}]+)?}}}}([{"".join(punctuation_marks)}])'

            # Define the replacement pattern
            replacement = f'\\2{{{{{template_name}\\1}}}}'

            # Perform the replacement using textlib
            text = textlib.replaceExcept(text, pattern, replacement, ['table'])

        return text

    def removeSelfCat(self, text: str) -> str:
        if self.namespace != 14 and not TESTING:
            return text

        category_links = textlib.getCategoryLinks(text, site=self.site)

        # Construct new category links without self.title while preserving sortkeys
        new_category_links = []
        for category in category_links:
            if category.title() != self.title:
                sortkey = category.sortKey
                if sortkey:
                    new_category_links.append(f"{category.title()}|{sortkey}")
                else:
                    new_category_links.append(category.title())

        # Replace existing categories with new category links
        text = textlib.replaceCategoryLinks(text, new_category_links, site=self.site)

        return text

    def removeDupeCats(self, text: str) -> str:
        # Extract categories
        categories = textlib.getCategoryLinks(text, self.site)

        seen_categories = {}
        final_categories = []

        # Iterate through categories
        for category in categories:
            cat_title = category.title()
            cat_sortkey = category.sortKey

            if cat_title not in seen_categories:
                # Record the first occurrence of the category
                seen_categories[cat_title] = cat_sortkey
                final_categories.append(category)
            else:
                # Handle duplicate categories
                first_sortkey = seen_categories[cat_title]

                if not first_sortkey and not cat_sortkey:
                    # Skip the current category as it is a duplicate without a sortkey
                    continue

                # If the current category has a sortkey, we keep it and replace the first occurrence
                # if the first occurrence does not have a sortkey
                if not first_sortkey and cat_sortkey:
                    # Replace the first occurrence with the current one
                    final_categories = [cat for cat in final_categories if cat.title() != cat_title]
                    final_categories.append(category)
                    # Update the seen_categories with the new sortkey
                    seen_categories[cat_title] = cat_sortkey

        # Replace the categories in the text
        text = textlib.replaceCategoryLinks(text, final_categories, site=self.site)

        return text

    def removeDupeParam(self, text: str) -> str:
        wikicode = mwparserfromhell.parse(text)

        for template in wikicode.filter_templates():
            params_seen = set()
            for param in template.params:
                param_name = str(param.name).strip()
                if param_name in params_seen and (not param.value.strip()):  # Check for empty values
                    template.remove(param)
                else:
                    params_seen.add(param_name)

        text = str(wikicode)
        return text

    def replaceDeprecatedParams(self, text: str) -> str:

        with open('parambikejson.json', encoding='utf-8') as f:
            alias_dict = json.load(f)

        wikicode = mwparserfromhell.parse(text)

        for template in wikicode.filter_templates():
            template_name = ucfirst(template.name)

            # Check if the capitalized template name is in alias_dict
            if template_name in alias_dict:
                params_to_replace = alias_dict[template_name]

                # Loop through each parameter in the template
                for param in template.params:
                    param_name = param.name.strip()

                    # Check if the parameter name needs replacing
                    if param_name in params_to_replace:
                        new_param_name = params_to_replace[param_name]
                        param.name = new_param_name

        text = str(wikicode)
        return text

    def fixAgahidankSpace(self, text: str) -> str:
        if self.namespace != 0 and not TESTING:
            return text

        wikicode = mwparserfromhell.parse(text)

        for template in wikicode.filter_templates():
            template_name = ucfirst(template.name)

            if template_name.startswith("Agahîdank"):
                if template.params:
                    # Iterate over the parameters and format them
                    for param in template.params:
                        # Calculate space padding based on the length of the parameter name
                        param_name_length = len(param.name.strip())
                        if param_name_length <= 17:
                            space_padding = " " * (18 - param_name_length) + " "
                        else:
                            space_padding = " "

                        # Add a line break after each parameter value
                        param.value = " " + param.value.strip() + "\n"

                        # Update parameter name with padding
                        param.name = " {}{}".format(param.name.strip(), space_padding)

                    # Add a line break after the template name
                    template.name = template.name.strip() + "\n"
                else:
                    # Handle the case where there are no parameters in the template
                    pass

        return str(wikicode)

    def removeLtrMark(self, text: str) -> str:
        """
        Removes all occurrences of the Left-to-Right Mark (U+200E) from the input string.

        :param text: The string to process.
        :return: A new string with all LTR marks removed.
        """
        return text.replace('\u200e', '')

    def replaceDeprecatedTemplates(self, text: str) -> str:
        """
        Renames redirected templates from redirected_template_mappings.json for kuwiki
        """

        # Load JSON file containing template name mappings
        with open('redirected_template_mappings.json', encoding='utf-8') as f:
            template_mappings = json.load(f)

        wikicode = mwparserfromhell.parse(text)

        # Iterate over each template in the parsed text
        for template in wikicode.filter_templates():
            old_name = ucfirst(template.name)
            # Check if the template name exists in the JSON mappings
            if old_name in template_mappings:
                new_name = template_mappings[old_name]["rd_title"]  # Get the new template name
                # Find the position of the old template name in template.name
                start_index = template.name.lower().find(old_name.lower())

                # Replace the old template name with the new one in template.name
                template.name = (
                        template.name[:start_index] + new_name + template.name[start_index + len(old_name):]
                )

        return str(wikicode)

    def assignNamesToUnnamedRefs(self, text: str) -> str:
        """
        This function assigns names to unnamed <ref> tags by checking for duplicate ref values
        and giving them names like :0, :1, etc.

        Parameters:
            text (str): The input wikitext containing <ref> tags.

        Returns:
            str: The modified wikitext with named <ref> tags.
        """

        parsed = mwparserfromhell.parse(text)
        # tags = list(parsed.ifilter(forcetype=mwparserfromhell.wikicode.Tag, matches="<\\s*ref\\s*", recursive=True))
        tags = list()
        for tag in parsed.filter_tags(recursive=True):
            if tag.tag == "ref":
                tags.append(tag)

        # Dictionary to keep track of ref contents and their assigned names
        ref_contents_to_names = {}
        # Counter for generating unique names
        counter = 0
        # Dictionary to track counts of ref contents
        ref_content_counts = {}
        # Set to track existing ref names
        existing_names = set()
        i = 1
        # Collect existing ref names and map ref contents to names

        for tag in tags:

            #print(f"-----\ntag {i}:\n{str(tag)}\n------")
            i += 1
            if tag.has("name"):
                name = tag.get("name").value.strip()
                ref_content = str(tag.contents).strip()
                existing_names.add(name)
                ref_contents_to_names[ref_content] = name

        # First pass: Identify unnamed refs and initialize ref_content_counts
        for tag in tags:
            if not tag.has("name"):
                ref_content = str(tag.contents).strip()
                if ref_content in ref_content_counts:
                    ref_content_counts[ref_content] += 1
                else:
                    ref_content_counts[ref_content] = 1

        # Set to track if a ref content has been named for the first time
        first_occurrence_named = set()

        # Second pass: Rename tags and handle duplicates
        for tag in tags:
            if not tag.has("name"):
                ref_content = str(tag.contents).strip()
                if ref_content in ref_contents_to_names:
                    # If the content matches a named reference, use the existing name
                    new_tag = mwparserfromhell.nodes.tag.Tag("ref", self_closing=True)
                    new_tag.add("name", ref_contents_to_names[ref_content])
                    parsed.replace(tag, new_tag)
                else:
                    if ref_content_counts[ref_content] > 1:
                        if ref_content not in first_occurrence_named:
                            # Assign a unique name to the first occurrence of the duplicate content
                            while f":{counter}" in existing_names:
                                counter += 1
                            new_name = f":{counter}"
                            ref_contents_to_names[ref_content] = new_name
                            tag.add("name", new_name)
                            first_occurrence_named.add(ref_content)
                            existing_names.add(new_name)
                        else:
                            # For subsequent occurrences, create a self-closing ref tag with the assigned name
                            new_tag = mwparserfromhell.nodes.tag.Tag("ref", self_closing=True)
                            new_tag.add("name", ref_contents_to_names[ref_content])
                            parsed.replace(tag, new_tag)
                    else:
                        # If it's a unique unnamed ref, we do nothing
                        pass

        return str(parsed)

    def fixVrefNames(self, text: str) -> str:
        """
        taken from [[:en:User:Qwerfjkl/VEref.py]] which is itself taken
        from [[:en:User:Psiĥedelisto/VisualEditor ref namer.py]]
        The VisualEditor, (very annoyingly!), doesn't name references added by users, and gives them names like :0, :1, etc. This script fixes that automatically
        Changes some lower case template names to upper and vice versa
        """
        if self.namespace != 0 and not TESTING:
            return text
        parsed = mwparserfromhell.parse(text)
        tags = list(filter(None, [t if t.has("name") else None for t in
                                  parsed.ifilter(forcetype=mwparserfromhell.wikicode.Tag, matches="<\\s*ref\\s*",
                                                 recursive=True)]))

        # list of existing ref names, we will compare to make sure we dont create the same one again
        ref_names = [tag.get("name").value for tag in tags]

        # find the list of numbered ref tags
        refs = list(
            filter(lambda s: re.search("^:\d+$", str(s.get("name").value)) and not re.search("/>$", str(s)), tags))
        pretty = dict()

        for ref in refs:
            template = ref.contents.get(0)
            if not isinstance(template, mwparserfromhell.nodes.Template):  # Check if template is a Template object
                continue
            if template.has("vauthors"):
                v = str(template.get("vauthors").value)
            elif template.has("authors"):
                v = str(template.get("authors").value)
            elif template.has("paşnav"):
                v = str(template.get("paşnav").value)
            elif template.has("pêşnav"):
                v = str(template.get("pêşnav").value)
            else:
                continue

            v = v.strip()

            if "," in v:
                last = v[:v.index(",")]
            elif " " in v:
                last = v[:v.index(" ")]
            else:
                last = v

            punctuation = set(string.punctuation)

            # Strip punctuation characters from the last word directly
            last = ''.join([char for char in last if char not in punctuation])

            if re.match(r'^[0-9\-.,]+$', last):
                last = False
            else:
                # Check if the last name contains Latin alphabet characters
                latin_alphabet = set(string.ascii_letters)
                if not any(char in latin_alphabet for char in last):
                    last = False

            date = False
            if template.has("tarîx"):
                date = str(template.get("tarîx").value)
            elif template.has("dîrok"):
                date = str(template.get("dîrok").value)
            elif template.has("sal"):
                date = str(template.get("sal").value)

            if date and last:
                match = re.search('\d{4}', date)
                if match:
                    date = match[0]
                    new_name = "{}{}".format(last, date)
                    # Ensure there are no duplicate values in pretty and new_name does not exist in the current text using ref_names
                    if new_name not in pretty.values() and new_name not in ref_names:
                        pretty[str(ref.get("name").value)] = new_name
        if not pretty:
            return text

        if VERBOSE:
            print("pretty:", pretty)

        for tag in parsed.ifilter(forcetype=mwparserfromhell.wikicode.Tag, matches="<\\s*ref\\s*", recursive=True):
            if not tag.has("name"):
                continue

            k = str(tag.get("name").value)

            if k in pretty:
                tag.attributes[0].value = pretty[k]

        return str(parsed)

    def fixSelfInterwiki(self, text: str) -> str:
        """
        Interwiki links to the site itself are displayed like local links.

        Remove their language code prefix.
        """
        if not self.talkpage and pywikibot.calledModuleName() != 'interwiki':
            interwikiR = re.compile(r'\[\[(?: *:)? *{} *: *([^\[\]\n]*)]]'
                                    .format(self.site.code))
            text = interwikiR.sub(r'[[\1]]', text)
        return text

    def fixMainCat(self, text: str) -> str:
        """
        Retrieve the main category from wikidata or create it if need be
        """
        assert self.title is not None

        if self.namespace != 0 and not TESTING:
            return text

        if self.is_disambig or self.gotara_zaravayan:
            return text

        categories = textlib.getCategoryLinks(text, site=self.site)
        new_text = text
        if categories:

            main = pywikibot.Category(self.site, 'Category:' + self.title,
                                      sort_key=' ')
            if main in categories:
                return text

            # Get main categories from Wikidata
            maincats = self.get_main_cat(self.title)
            if maincats:
                if VERBOSE:
                    print(f"maincats: {maincats}")
                kuwiki_link = maincats.get('kuwiki')
                enwiki_link = maincats.get('enwiki')

                if kuwiki_link:
                    if VERBOSE:
                        print(f"kuwiki Main cat found: {kuwiki_link}")
                    main = pywikibot.Category(self.site, kuwiki_link, sort_key=' ')
                    if main in categories:
                        categories.pop(categories.index(main))
                    categories.insert(0, main)

                new_text = textlib.replaceCategoryLinks(text, categories,
                                                        site=self.site)
        if ''.join(text.split()) != ''.join(new_text.split()):
            return new_text
        else:
            return text

    def standardizeFooterTemplates(self, text: str) -> str:
        if self.namespace != 0 and not TESTING:
            return text

        if self.is_disambig or self.gotara_zaravayan:
            return text

        # Find and remove other templates
        sitil_template_regex = r'{{\s*([^\}]+\-şitil|[Şş]iti?l|[Kk]urt|[Ss]tub|[Şş]itlek|[^\}]+\-şitil\-[^\}]+)\s*}}'
        sitil_templates = re.findall(sitil_template_regex, text)
        text = re.sub(sitil_template_regex, '', text)

        template_sitil_regex = r'{{\s*([Şş]itil-[^\}]+)\s*}}'
        template_sitil = re.findall(template_sitil_regex, text)
        text = re.sub(template_sitil_regex, '', text)

        # Find and remove DEFAULTSORT
        defaultsort_regex = r'{{\s*(DEFAULTSORT:[^}]+|Salê kat bike heke sal hebe)\s*}}'
        defaultsort = re.findall(defaultsort_regex, text)
        defaultsort = defaultsort[0] if defaultsort else ""
        text = re.sub(defaultsort_regex, '', text)

        kontrol_oto_regex = r'\{\{\s*([kK]ontrola otorîtey?ê?|[aA]uthority control|Kontrola otorîte)\s*}}'
        kontrol_oto_templ = re.findall(kontrol_oto_regex, text)
        text = re.sub(kontrol_oto_regex, '', text)

        # Find and remove koord display=title template
        koord_regex = r'{{\s*([Kk]oord|[Cc]oord)\s*\|\s*([^}]+display\s*=\s*title)\s*}}'
        koord_templates = [match[1] for match in re.findall(koord_regex, text)]
        text = re.sub(koord_regex, '', text)

        updated_text = text

        if len(kontrol_oto_templ) > 0:
            updated_text = textlib.add_text(updated_text, "\n{{Kontrola otorîteyê}}", site=self.site)

        if len(template_sitil) > 0:
            add_template_sitil = '\n'.join('{{' + template_s + '}}' for template_s in template_sitil)
            updated_text = textlib.add_text(updated_text, add_template_sitil, site=self.site)

        if len(sitil_templates) > 0:
            add_sitil_templates = '\n'.join('{{' + template + '}}' for template in sitil_templates)
            updated_text = textlib.add_text(updated_text, add_sitil_templates, site=self.site)

        if len(koord_templates) > 0:
            add_koord_templates = '\n'.join('{{Koord|' + koord_template + '}}' for koord_template in koord_templates)
            updated_text = textlib.add_text(updated_text, add_koord_templates, site=self.site)

        if len(defaultsort) > 0:
            add_defaultsort = '\n' + '{{' + defaultsort + '}}'
            updated_text = textlib.add_text(updated_text, add_defaultsort, site=self.site)

        # Remove empty lines at the end of the page
        updated_text = re.sub(r'\n\n+', '\n\n', updated_text)

        return updated_text

    def standardizePageFooter(self, text: str) -> str:
        """
        Standardize page footer.

        Makes sure that interwiki links and categories are put
        into the correct position and into the right order. This
        combines the old instances of standardizeInterwiki
        and standardizeCategories.

        The page footer consists of the following parts
        in that sequence:
        1. categories
        2. additional information depending on the local site policy
        3. interwiki
        """
        assert self.title is not None

        categories = []
        interwiki_links = {}

        # get categories
        if not self.template:
            categories = textlib.getCategoryLinks(text, site=self.site)

        subpage = False
        if not self.talkpage:

            if self.template:
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    loc = None
                if loc is not None and loc in self.title:
                    subpage = True

            # get interwiki
            interwiki_links = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # remove interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)

        if self.namespace == 0 or TESTING:
            text = self.standardizeFooterTemplates(text)

        # add categories, main to top
        if categories:

            main = pywikibot.Category(self.site, 'Category:' + self.title,
                                      sort_key=' ')
            if main in categories:
                categories.pop(categories.index(main))
                categories.insert(0, main)

            # Sort categories in alphabetic order
            def kurdish_sort_key(category):

                # Assign each character in the category name its index in the Kurdish alphabet
                kurdish_alphabet = "abccçdeêfghiîjklmnopqrsştuûvwxyzABCCÇDEÊFGHIÎJKLMNOPQRSŞTUÛVWXYZ"
                category_title = category.title()
                return tuple(
                    kurdish_alphabet.index(c) if c in kurdish_alphabet else float('inf') for c in category_title)

            categories.sort(key=kurdish_sort_key)

            text = textlib.replaceCategoryLinks(text, categories,
                                                site=self.site)

        # add interwiki
        if interwiki_links:
            text = textlib.replaceLanguageLinks(text, interwiki_links,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)

        return text

    def translateAndCapitalizeNamespaces(self, text: str) -> str:
        """Use localized namespace names.

        .. versionchanged:: 7.4
           No longer expect a specific namespace alias for File:
        """
        # wiki links aren't parsed here.
        exceptions = ['nowiki', 'comment', 'math', 'pre']

        for namespace in self.site.namespaces.values():
            if namespace == 0:
                # skip main (article) namespace
                continue
            # a clone is needed. Won't change the namespace dict
            namespaces = list(namespace)

            # final namespace variant
            final_ns = namespaces.pop(0)
            if namespace in (2, 3):
                # skip localized user namespace, maybe gender is used
                namespaces = ['User' if namespace == 2 else 'User talk']
            # lowerspaced and underscored namespaces
            for i, item in enumerate(namespaces):
                item = item.replace(' ', '[ _]')
                item = f'[{item[0]}{item[0].lower()}]' + item[1:]
                namespaces[i] = item
            namespaces.append(first_lower(final_ns))
            if final_ns and namespaces:
                text = textlib.replaceExcept(
                    text,
                    r'\[\[\s*({}) *:(?P<nameAndLabel>.*?)\]\]'
                    .format('|'.join(namespaces)),
                    fr'[[{final_ns}:\g<nameAndLabel>]]',
                    exceptions)
        return text

    def translateMagicWords(self, text: str) -> str:
        """Use localized magic words."""

        def init_cache() -> None:
            for magicword in ('img_thumbnail', 'img_left', 'img_center',
                              'img_right', 'img_none', 'img_framed',
                              'img_frameless', 'img_border', 'img_upright',
                              'img_baseline', 'img_sub', 'img_super',
                              'img_top', 'img_text_top', 'img_middle',
                              'img_bottom', 'img_text_bottom'):
                aliases = self.site.getmagicwords(magicword)
                if len(aliases) > 1:
                    cache.update((alias, aliases[0]) for alias in aliases[1:]
                                 if '$1' not in alias)
            if not cache:
                cache[False] = True  # signal there is nothing to replace

        def replace_magicword(match: Match[str]) -> str:
            if cache.get(False):
                return match.group()
            split = match.group().split('|')
            if len(split) == 1:
                return match.group()

            if not cache:
                init_cache()

            # push ']]' out and re-add below
            split[-1] = split[-1][:-2]
            return '{}|{}]]'.format(
                split[0], '|'.join(cache.get(x.strip(), x) for x in split[1:]))

        cache: Dict[Union[bool, str], Any] = {}
        exceptions = ['comment', 'nowiki', 'pre', 'syntaxhighlight']
        regex = re.compile(
            FILE_LINK_REGEX % '|'.join(self.site.namespaces[6]),
            flags=re.X)
        return textlib.replaceExcept(
            text, regex, replace_magicword, exceptions)

    def cleanUpLinks(self, text: str) -> str:
        """Tidy up wikilinks found in a string.

        This function will:

        * Replace underscores with spaces
        * Move leading and trailing spaces out of the wikilink and into the
          surrounding text
        * Convert URL-encoded characters into Unicode-encoded characters
        * Move trailing characters out of the link and make the link without
          using a pipe, if possible
        * Capitalize the article title of the link, if appropriate

        .. versionchanged:: 8.4
           Convert URL-encoded characters if a link is an interwiki link
           or different from main namespace.

        :param text: string to perform the clean-up on
        :return: text with tidied wikilinks
        """

        # helper function which works on one link and either returns it
        # unmodified, or returns a replacement.
        def handleOneLink(match: Match[str]) -> str:
            # Convert URL-encoded characters to str
            titleWithSection = url2string(match['titleWithSection'],
                                          encodings=self.site.encodings())
            label = match['label']
            trailingChars = match['linktrail']
            newline = match['newline']
            # entire link but convert URL-encoded text
            oldlink = url2string(match.group(),
                                 encodings=self.site.encodings())

            is_interwiki = self.site.isInterwikiLink(titleWithSection)
            if is_interwiki:
                return oldlink

            # The link looks like this:
            # [[page_title|link_text]]trailing_chars
            # We only work on namespace 0 because pipes and linktrails work
            # differently for images and categories.
            page = pywikibot.Page(pywikibot.Link(titleWithSection, self.site))
            try:
                in_main_namespace = page.namespace() == 0
            except InvalidTitleError:
                in_main_namespace = False
            if not in_main_namespace:
                return oldlink

            # Replace underlines by spaces, also multiple underlines
            titleWithSection = re.sub('_+', ' ', titleWithSection)
            # Remove double spaces
            titleWithSection = re.sub('  +', ' ', titleWithSection)
            # Remove unnecessary leading spaces from title,
            # but remember if we did this because we eventually want
            # to re-add it outside of the link later.
            titleLength = len(titleWithSection)
            titleWithSection = titleWithSection.lstrip()
            hadLeadingSpaces = len(titleWithSection) != titleLength
            hadTrailingSpaces = False
            # Remove unnecessary trailing spaces from title,
            # but remember if we did this because it may affect
            # the linktrail and because we eventually want to
            # re-add it outside of the link later.
            if not trailingChars:
                titleLength = len(titleWithSection)
                titleWithSection = titleWithSection.rstrip()
                hadTrailingSpaces = len(titleWithSection) != titleLength

            if not titleWithSection:
                # just skip empty links.
                return match.group()

            # Remove unnecessary initial and final spaces from label.
            # Please note that some editors prefer spaces around pipes.
            # (See [[en:Wikipedia:Semi-bots]]). We remove them anyway.
            if label is not None:
                # Remove unnecessary leading spaces from label,
                # but remember if we did this because we want
                # to re-add it outside of the link later.
                labelLength = len(label)
                label = label.lstrip()
                hadLeadingSpaces = len(label) != labelLength
                # Remove unnecessary trailing spaces from label,
                # but remember if we did this because it affects
                # the linktrail.
                if not trailingChars:
                    labelLength = len(label)
                    label = label.rstrip()
                    hadTrailingSpaces = len(label) != labelLength
            else:
                label = titleWithSection
            if trailingChars:
                label += trailingChars

            if self.site.siteinfo['case'] == 'first-letter':
                firstcase_title = first_lower(titleWithSection)
                firstcase_label = first_lower(label)
            else:
                firstcase_title = titleWithSection
                firstcase_label = label

            if firstcase_label == firstcase_title:
                newLink = f'[[{label}]]'
            # Check if we can create a link with trailing characters
            # instead of a pipelink
            elif (firstcase_label.startswith(firstcase_title)
                  and trailR.sub('', label[len(titleWithSection):]) == ''):
                newLink = '[[{}]]{}'.format(label[:len(titleWithSection)],
                                            label[len(titleWithSection):])

            else:
                # Try to capitalize the first letter of the title.
                # Not useful for languages that don't capitalize nouns.
                # TODO: Add a configuration variable for each site,
                # which determines if the link target is written in
                # uppercase
                if self.site.sitename == 'wikipedia:de':
                    titleWithSection = first_upper(titleWithSection)
                newLink = f'[[{titleWithSection}|{label}]]'
            # re-add spaces that were pulled out of the link.
            # Examples:
            #   text[[ title ]]text        -> text [[title]] text
            #   text[[ title | name ]]text -> text [[title|name]] text
            #   text[[ title |name]]text   -> text[[title|name]]text
            #   text[[title| name]]text    -> text [[title|name]]text
            if hadLeadingSpaces and not newline:
                newLink = ' ' + newLink
            if hadTrailingSpaces:
                newLink += ' '
            if newline:
                newLink = newline + newLink
            return newLink

        trailR = re.compile(self.site.linktrail())
        # The regular expression which finds links. Results consist of four groups:
        # group <newline> depends whether the links starts with a new line.
        # group <titleWithSection> is the page title and section, that is,
        # everything before | or ]. It'll include the # to make life easier for us.
        # group <label> is the alternative link title between | and ].
        # group <linktrail> is the link trail after ]] which are part of the word.
        # note that the definition of 'letter' varies from language to language.
        linkR = re.compile(
            r'(?P<newline>[\n]*)\[\[(?P<titleWithSection>[^\]\|]+)'
            r'(\|(?P<label>[^]|]*))?]](?P<linktrail>'
            + self.site.linktrail() + ')')

        text = textlib.replaceExcept(text, linkR, handleOneLink,
                                     ['comment', 'math', 'nowiki', 'pre',
                                      'startspace'])
        return text

    def resolveHtmlEntities(self, text: str) -> str:
        """Replace HTML entities with string."""
        ignore = [
            38,  # Ampersand (&amp;)
            39,  # Single quotation mark (&quot;) per T26093
            60,  # Less than (&lt;)
            62,  # Greater than (&gt;)
            91,  # Opening square bracket ([)
            # - sometimes used intentionally inside links
            93,  # Closing square bracket (])
            # - used intentionally inside links
            124,  # Vertical bar (|)
            # - used intentionally in navigation bar templates on w:de
            160,  # Non-breaking space (&nbsp;)
            # - not supported by Firefox textareas
            173,  # Soft-hypen (&shy;) - enable editing
            8206,  # Left-to-right mark (&ltr;)
            8207,  # Right-to-left mark (&rtl;)
        ]
        if self.template:
            ignore.append(32)  # Space ( )
            ignore.append(58)  # Colon (:)
        # TODO: T254350 - what other extension tags should be avoided?
        # (graph, math, score, timeline, etc.)
        text = pywikibot.html2unicode(
            text, ignore=ignore, exceptions=['comment', 'syntaxhighlight'])
        return text

    def removeUselessSpaces(self, text: str) -> str:
        """Cleanup multiple or trailing spaces."""
        exceptions = ['comment', 'math', 'nowiki', 'pre', 'syntaxhighlight',
                      'startspace', 'table', 'template', 'timeline']
        text = textlib.replaceExcept(text, r'(?m)[\t ]+( |$)', r'\1',
                                     exceptions, site=self.site)

        text = textlib.replaceExcept(text, r'\n\n\n*', r'\n\n',
                                     exceptions, site=self.site)

        # text = textlib.replaceExcept(text, r'\n +', r'\n',
        #                            exceptions, site=self.site)
        return text

    def removeNonBreakingSpaceBeforePercent(self, text: str) -> str:
        """
        Remove a non-breaking space between number and percent sign.

        Newer MediaWiki versions automatically place a non-breaking space in
        front of a percent sign, so it is no longer required to place it
        manually.
        """
        text = textlib.replaceExcept(
            text, r'(\d)&(?:nbsp|#160|#x[Aa]0);%', r'\1 %', ['timeline'])
        return text

    def cleanUpSectionHeaders(self, text: str) -> str:
        """
        Add a space between the equal signs and the section title.

        Example::

            ==Section title==

        becomes::

        == Section title ==

        .. note:: This space is recommended in the syntax help on the
           English and German Wikipedias. It is not wanted on Lojban and
           English Wiktionaries (:phab:`T168399`, :phab:`T169064`) and
           it might be that it is not wanted on other wikis. If there
           are any complaints, please file a bug report.
        """
        return textlib.replaceExcept(
            text,
            r'(?m)^(={1,6})[ \t]*(?P<title>.*[^\s=])[ \t]*\1[ \t]*\r?\n',
            r'\1 \g<title> \1\n',
            ['comment', 'math', 'nowiki', 'pre'])

    def putSpacesInLists(self, text: str) -> str:
        """
        Add a space between the * or # and the text.

        .. note:: This space is recommended in the syntax help on the
           English, German and French Wikipedias. It might be that it
           is not wanted on other wikis. If there are any complaints,
           please file a bug report.
        """
        if not self.template:
            exceptions = ['comment', 'math', 'nowiki', 'pre',
                          'syntaxhighlight', 'template', 'timeline',
                          self.site.redirect_regex]
            text = textlib.replaceExcept(
                text,
                r'(?m)'
                r'^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)',
                r'\g<bullet> \g<char>',
                exceptions)
        return text

    # from fixes.py
    def fixSyntaxSave(self, text: str) -> str:
        """Convert weblinks to wikilink, fix link syntax.
        """

        def replace_link(match: Match[str]) -> str:
            """Create a string to replace a single link."""
            replacement = '[['
            if re.match(r'(?:{}):'
                                .format('|'.join((*self.site.namespaces[6],
                                                  *self.site.namespaces[14]))),
                        match['link']):
                replacement += ':'
            link = match['link']

            if link.endswith('/'):
                print('url ends with /')
                link = re.sub('/$', '', match['link'])

            replacement += link
            if match['title']:
                replacement += '|' + match['title']

            return replacement + ']]'

        exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace',
                      'syntaxhighlight']

        # link to the wiki working on
        # Only use suffixes for article paths
        for suffix in self.site._interwiki_urls(True):
            http_url = self.site.base_url(suffix, 'http')
            if self.site.protocol() == 'http':
                https_url = None
            else:
                https_url = self.site.base_url(suffix, 'https')

            # compare strings without the protocol, if they are empty support
            # also no prefix (//en.wikipedia.org/…)
            http = urlparse(http_url)
            https = urlparse(https_url)
            if https_url is not None and http.netloc == https.netloc:
                urls = ['(?:https?:)?'
                        + re.escape(urlunparse(('', *http[1:])))]
            else:
                urls = [re.escape(url) for url in (http_url, https_url)
                        if url is not None]

            for url in urls:
                # unescape {} placeholder
                url = url.replace(r'\{\}', '{title}')

                # Only include links which don't include the separator
                # as the wikilink won't support additional parameters
                separator = '?&' if '?' in suffix else '?'

                # Match first a non space in the title to prevent that multiple
                # spaces at the end without title will be matched by it
                title_regex = (r'(?P<link>[^{sep}]+?)'
                               r'(\s+(?P<title>[^\s].*?))'
                               .format(sep=separator))
                url_regex = fr'\[\[?{url}?\s*\]\]?'
                text = textlib.replaceExcept(
                    text,
                    url_regex.format(title=title_regex),
                    replace_link, exceptions, site=self.site)

        # external link in/starting with double brackets
        text = textlib.replaceExcept(
            text,
            r'\[\[(?P<url>https?://[^\]]+?)\]\]?',
            r'[\g<url>]', exceptions, site=self.site)

        # external link and description separated by a pipe, with
        # whitespace in front of the pipe, so that it is clear that
        # the dash is not a legitimate part of the URL.
        text = textlib.replaceExcept(
            text,
            r'\[(?P<url>https?://[^\|\] \r\n]+?) +\| *(?P<label>[^\|\]]+?)\]',
            r'[\g<url> \g<label>]', exceptions)

        # dash in external link, where the correct end of the URL can
        # be detected from the file extension. It is very unlikely that
        # this will cause mistakes.
        extensions = [fr'\.{ext}'
                      for ext in ['pdf', 'html?', 'php', 'aspx?', 'jsp']]
        text = textlib.replaceExcept(
            text,
            r'\[(?P<url>https?://[^\|\] ]+?(' + '|'.join(extensions) + r')) *'
                                                                       r'\| *(?P<label>[^\|\]]+?)\]',
            r'[\g<url> \g<label>]', exceptions)
        return text

    def fixHtml(self, text: str) -> str:
        """Relace html markups with wikitext markups."""

        def replace_header(match: Match[str]) -> str:
            """Create a header string for replacing."""
            depth = int(match[1])
            return r'{0} {1} {0}'.format('=' * depth, match[2])

        # Everything case-insensitive (?i)
        # Keep in mind that MediaWiki automatically converts <br> to <br />
        exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace',
                      'syntaxhighlight']
        text = textlib.replaceExcept(text, r'(?i)<(b|strong)>(.*?)</\1>',
                                     r"'''\2'''", exceptions, site=self.site)
        text = textlib.replaceExcept(text, r'(?i)<(i|em)>(.*?)</\1>',
                                     r"''\2''", exceptions, site=self.site)
        # horizontal line without attributes in a single line
        text = textlib.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])',
                                     r'\1----\2', exceptions)
        # horizontal line with attributes; can't be done with wiki syntax
        # so we only make it XHTML compliant
        text = textlib.replaceExcept(text, r'(?i)<hr ([^>/]+?)>',
                                     r'<hr \1 />',
                                     exceptions)
        # a header where only spaces are in the same line
        text = textlib.replaceExcept(
            text,
            r'(?i)(?<=[\r\n]) *<h([1-7])> *([^<]+?) *</h\1> *(?=[\r\n])',
            replace_header,
            exceptions)
        # TODO: maybe we can make the bot replace <p> tags with \r\n's.
        return text

    def fixReferences(self, text: str) -> str:
        """Fix references tags."""
        # See also
        # https://en.wikipedia.org/wiki/User:AnomieBOT/source/tasks/OrphanReferenceFixer.pm
        if self.namespace != 0 and not TESTING:
            return text
        exceptions = ['comment', 'math', 'nowiki', 'pre', 'syntaxhighlight',
                      'startspace']

        # it should be name = " or name=" NOT name   ="
        text = re.sub(r'(?i)<ref +name(= *| *=)"', r'<ref name="', text)

        # Replace <ref name=Penny1p16> with <ref name="Penny1p16">, only if not already quoted
        text = textlib.replaceExcept(text,
                                     r'(?i)<ref +name *= *([^\"\/ >]+) *>',
                                     r'<ref name="\1">', exceptions)

        # Replace <ref name=Penny1p16 /> with <ref name="Penny1p16" />, only if not already quoted
        text = textlib.replaceExcept(text,
                                     r'(?i)<ref +name *= *([^\" >]+) */>',
                                     r'<ref name="\1"/>', exceptions)

        # remove empty <ref/>-tag
        text = textlib.replaceExcept(text,
                                     r'(?i)(<ref\s*/>|<ref *>\s*</ref>)',
                                     r'', exceptions)
        text = textlib.replaceExcept(text,
                                     r'</ref>[ ]*<ref>',
                                     r'</ref><ref>', exceptions)
        return text

    def fixStyle(self, text: str) -> str:
        """Convert prettytable to wikitable class."""
        if self.namespace != 0 and not TESTING:
            return text

        exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace',
                      'syntaxhighlight']

        text = textlib.replaceExcept(text,
                                     r'(class="[^"]*)prettytable([^"]*")',
                                     r'\1wikitable\2', exceptions)
        return text

    def fixTypo(self, text: str) -> str:
        """Fix units."""
        if self.namespace != 0 and not TESTING:
            return text
        exceptions: List[Union[str, Pattern[str]]] = [
            'comment',
            'gallery',
            'hyperlink',
            'interwiki',
            'link',
            'nowiki',
            'math',
            'pre',
            'startspace',
            'syntaxhighlight',
        ]

        # change <number> ccm -> <number> cm³
        text = textlib.replaceExcept(text, r'(\d)\s*(?:&nbsp;)?ccm',
                                     r'\1&nbsp;cm³', exceptions,
                                     site=self.site)
        # Solve wrong Nº sign with °C or °F
        # additional exception requested on fr-wiki for this stuff
        pattern = re.compile('«.*?»')
        exceptions.append(pattern)
        text = textlib.replaceExcept(text, r'(\d)\s*(?:&nbsp;)?[º°]([CF])',
                                     r'\1&nbsp;°\2', exceptions,
                                     site=self.site)
        text = textlib.replaceExcept(text, 'º([CF])', '°' + r'\1',
                                     exceptions,
                                     site=self.site)
        return text

    def fix_ISBN(self, text: str) -> str:
        """Hyphenate ISBN numbers."""
        return _reformat_ISBNs(text, strict=self.ignore != CANCEL.MATCH)

    def fixLead(self, text: str) -> str:
        if self.namespace != 0 and not TESTING:
            return text

        cudakirin_templates = mytools.get_cat_members(self.site, "Şablon (cudakirin)", 10)
        hisyarde_templates = mytools.get_cat_members(self.site, "Şablonên hişyarde", 10)
        cleanup_templates = mytools.get_cat_members(self.site, "Şablonên hişyarde ji bo gotaran", 10)

        if "Çend problem" in cleanup_templates:
            cleanup_templates.remove("Çend problem")

        wikicode = mwparserfromhell.parse(text)
        sections = wikicode.get_sections(include_lead=True)

        lead_section = sections[0]
        if VERBOSE:
            print("lead_section:\n", lead_section)

        existing_problems = None
        removed_cleanup_templates = []
        removed_hisyarde_templates = []
        removed_agahidank_templates = []
        removed_sernav_templates = []
        removed_cuda_templates = []
        child_agahidanks = []

        def is_child_agahidank(section):
            for parent_templ in section.filter_templates(recursive=True):
                if ucfirst(parent_templ.name).startswith("Agahîdank"):
                    for param in parent_templ.params:
                        # Check if the parameter value contains the child template
                        for tpl in param.value.filter_templates():
                            if ucfirst(tpl.name).startswith("Agahîdank"):
                                child_agahidanks.append(ucfirst(tpl.name))

            return child_agahidanks

        child_agahidanks = is_child_agahidank(lead_section)

        for template in lead_section.filter_templates():
            if ucfirst(template.name) == "Çend problem":
                if template.has(1):
                    existing_problems = str(template.get(1).value).strip()
                    lead_section.remove(template)

        for template in lead_section.filter_templates():
            template_name = ucfirst(template.name)
            if template_name in cleanup_templates:
                removed_cleanup_templates.append(template)
                lead_section.remove(template)
            if template_name in hisyarde_templates:
                removed_hisyarde_templates.append(template)
                lead_section.remove(template)
            if template_name in self.sernav_templates:
                removed_sernav_templates.append(template)
                lead_section.remove(template)
            if ucfirst(template.name) in cudakirin_templates:
                removed_cuda_templates.append(template)
                lead_section.remove(template)

            if template_name.startswith("Agahîdank"):
                if template_name in child_agahidanks:
                    continue
                else:
                    removed_agahidank_templates.append(template)
                    lead_section.remove(template)
        lead_section = str(lead_section).lstrip()
        new_lead_section = lead_section

        if removed_agahidank_templates:
            readding_agahidank = "\n".join([str(template) for template in removed_agahidank_templates])
            new_lead_section = readding_agahidank + "\n" + new_lead_section

        readding_cleanup = ""
        if existing_problems:
            readding_cleanup += existing_problems.strip() + "\n"
        if removed_cleanup_templates:
            readding_cleanup += "\n".join([str(template) for template in removed_cleanup_templates])
        if readding_cleanup.strip():
            if existing_problems:
                new_template = mwparserfromhell.nodes.Template("Çend problem")
                new_template.add(1, "\n" + readding_cleanup + "\n")
                new_lead_section = str(new_template) + "\n" + new_lead_section
            elif not existing_problems and len(removed_cleanup_templates) > 2:
                readding_cleanup = readding_cleanup
                new_template = mwparserfromhell.nodes.Template("Çend problem")
                new_template.add(1, "\n" + readding_cleanup + "\n")
                new_lead_section = str(new_template) + "\n" + new_lead_section
            else:
                new_lead_section = readding_cleanup + "\n" + new_lead_section

        if removed_hisyarde_templates:
            readding_hisyarde = "\n".join([str(template) for template in removed_hisyarde_templates])
            new_lead_section = readding_hisyarde + "\n" + new_lead_section

        if removed_cuda_templates:
            # Concatenate first_val with removed_templates
            concatenated_val = "\n".join([str(template) for template in removed_cuda_templates])
            new_lead_section = concatenated_val + "\n" + new_lead_section

        if removed_sernav_templates:
            readding_sernav = "\n".join([str(template) for template in removed_sernav_templates])
            new_lead_section = readding_sernav + "\n" + new_lead_section

        # Replace the lead section in the original wikicode object
        wikicode.replace(sections[0], new_lead_section)

        new_text = str(wikicode).lstrip()

        return new_text

    def addOrphanTag(self, text: str) -> str:
        if self.namespace != 0:
            return text

        if self.is_disambig or self.gotara_zaravayan:
            return text

        if self.contains_sewi_cat:
            return text

        if not self.is_sewi:
            return text

        text = text.lstrip()
        text = "{{Sêwî|tarîx=" + self.tarix + "}}\n" + text
        return text

    def removeOrphanTag(self, text: str) -> str:
        if self.namespace != 0:
            return text

        if self.is_disambig or self.gotara_zaravayan:
            return text

        if not self.contains_sewi_cat:
            return text

        if self.is_sewi:
            return text

        text = mytools.remove_template(text, "Sêwî")

        return text

    def addStubTag(self, text: str) -> str:
        if self.namespace != 0:
            return text

        if self.title.startswith("Lîste"):
            return text

        if self.is_liste:
            return text

        if self.is_disambig or self.gotara_zaravayan:
            return text

        if self.contains_sitil_cat:
            return text

        if self.is_sitil == 'lîste' or self.is_sitil is False:
            return text
        sitil_text = "{{Şitil}}"
        text = textlib.add_text(text, sitil_text)
        return text

    def removeStubTag(self, text: str) -> str:
        if self.namespace != 0:
            return text

        if not self.contains_sitil_cat:
            return text

        if self.title.startswith("Lîste"):
            return text

        if self.is_sitil == 'lîste' or self.is_sitil is True:
            return text

        # Find and remove other templates
        template_regex = r'{{\s*([^\}]+\-şitil|[Şş]iti?l|[Kk]urt|[Ss]tub|[Şş]itlek|[^\}]+\-şitil\-[^\}]+)\s*}}'
        new_text = re.sub(template_regex, '', text)

        # Find and remove other templates
        template_sitil_regex = r'{{\s*([Şş]itil-[^\}]+)\s*}}'
        new_text = re.sub(template_sitil_regex, '', new_text)
        if text != new_text:
            mytools.remove_sitil_class(self.current_page)

        return new_text

    def addUncatTag(self, text: str) -> str:

        if self.namespace not in [0, 14]:
            return text

        if self.namespace == 0 and (self.is_disambig or self.gotara_zaravayan):
            return text

        contains_bekat_templ = mytools.is_template_in_page(text, mytools.UNCAT_TEMPL)

        if contains_bekat_templ or self.is_bekategori in ['idk', False, None]:
            return text
        print("self.is_bekategori", self.is_bekategori)
        category_links = textlib.getCategoryLinks(text, site=self.site)

        # ji bo categorize.py
        if len(category_links) > 0:
            return text

        text = "{{Bêkategorî|tarîx=" + self.tarix + "}}\n" + text
        return text

    def removeUncatTag(self, text: str) -> str:

        contains_bekat_templ = mytools.is_template_in_page(text, mytools.UNCAT_TEMPL)

        if (self.namespace not in [0, 14] or
                not contains_bekat_templ or
                self.is_bekategori in ['idk', True, None]):
            return text

        text = mytools.remove_template(text, mytools.UNCAT_TEMPL)
        return text

    def fixApostSign(self, text: str) -> str:
        if self.namespace != 0:
            return text

        exceptions = ['comment', 'math', 'nowiki', 'pre', 'syntaxhighlight',
                      'startspace', 'table', 'ref', 'timeline']

        text = textlib.replaceExcept(text, r"&#39;", r"'", exceptions, site=self.site)

        return text