Bikarhêner:Balyozxane/skrîpt/py/vref.py

# Fork of [[en:User:Qwerfjkl/VEref.py]]
# Fork of [[User:Psiĥedelisto/VisualEditor ref namer.py]]
#
# (C) Pywikibot team, 2006-2021
#
# Distributed under the terms of the MIT license.
#
import mwparserfromhell
from mwparserfromhell.wikicode import Wikicode
import string
import re
import sys
import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import (
    AutomaticTWSummaryBot,
    ConfigParserBot,
    ExistingPageBot,
    NoRedirectPageBot,
    SingleSiteBot,
)
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {'&params;': pagegenerators.parameterHelp}  # noqa: N816

class BasicBot(
    # Refer pywikobot.bot for generic bot classes
    SingleSiteBot,  # A bot only working on one site
    ConfigParserBot,  # A bot which reads options from scripts.ini setting file
    # CurrentPageBot,  # Sets 'current_page'. Process it in treat_page method.
    #                  # Not needed here because we have subclasses
    ExistingPageBot,  # CurrentPageBot which only treats existing pages
    NoRedirectPageBot,  # CurrentPageBot which only treats non-redirects
    AutomaticTWSummaryBot,  # Automatically defines summary; needs summary_key
):
    """
    An incomplete sample bot.
    :ivar summary_key: Edit summary message key. The message that should be
        used is placed on /i18n subdirectory. The file containing these
        messages should have the same name as the caller script (i.e. basic.py
        in this case). Use summary_key to set a default edit summary message.
    :type summary_key: str
    """
    summary_key = 'basic-changing'
    update_options = {
        'replace': False,  # delete old text and write the new text
        'summary': "Bot: Navên referansan ên [[en:WP:VE|VisualEditor]]ê hat sererastkirin",  # your own bot summary
        'text': None,  # add this text from option. 'Test' is default
        'top': False,  # append text on top of the page
    }
    def treat_page(self) -> None:
        """Load the given page, do some changes, and save it."""
        text = self.current_page.text
        # Taken from [[User:Psiĥedelisto/VisualEditor ref namer.py]]
        skip = True
        parsed = mwparserfromhell.parse( text )
        tags = list(filter(None, [t if t.has("name") else None for t in parsed.ifilter(forcetype=mwparserfromhell.wikicode.Tag, matches="<\\s*ref\\s*", recursive=True)]))

        refs = list(filter(lambda s: re.search("^:\d+$", str(s.get("name").value)) and not re.search("/>$", str(s)), tags))

        pretty = dict()

        for ref in refs:
            template = ref.contents.get(0)
            if not isinstance(template, mwparserfromhell.nodes.Template):  # Check if template is a Template object
                continue
            if template.has("vauthors"):
                v = str(template.get("vauthors").value)
            elif template.has("authors"):
                v = str(template.get("authors").value)
            elif template.has("last"):
                v = str(template.get("last").value)
            elif template.has("first"):
                v = str(template.get("first").value)
            else:
                continue

            v = v.strip()

            if "," in v:
                last = v[:v.index(",")]
            elif " " in v:
                last = v[:v.index(" ")]
            else:
                last = v

            punctuation = set(string.punctuation)

            # Strip punctuation characters from the last word directly
            last = ''.join([char for char in last if char not in punctuation])

            if re.match(r'^[0-9\-.,]+$', last):
                last = False
            else:
                # Check if the last name contains Latin alphabet characters
                latin_alphabet = set(string.ascii_letters)
                if not any(char in latin_alphabet for char in last):
                    last = False

            date = False
            if template.has("date"):
                date = str(template.get("date").value)
            elif template.has("year"):
                date = str(template.get("year").value)

            if date and last:
                date = re.search("\d{4}", date)[0]

                pretty[str(ref.get("name").value)] = "{}{}".format(last, date)

        for tag in parsed.ifilter(forcetype=mwparserfromhell.wikicode.Tag, matches="<\\s*ref\\s*", recursive=True):
            if not tag.has("name"): continue
            k = str(tag.get("name").value)
            if k in pretty:
                tag.attributes[0].value = pretty[k]

                skip = False # Don't skip if there are non-cosmetic changes

        # Inside the loop
        for template in parsed.ifilter_templates():
            tn = template.name.strip()  # Strip whitespace from the template name
            new_name = ""
            if tn.lower() in ["rp", "ill", "bz", "pz", "lang", "ziman", "respell", "abbr"] or tn.lower().startswith(
                    ("lang-", "ziman-", "bi-")):
                new_name = tn[0].lower() + tn[1:]
            else:
                new_name = tn[0].upper() + tn[1:]

            # Preserve whitespace characters
            whitespace_index = template.name.find(tn) + len(tn)
            whitespace = template.name[whitespace_index:]
            new_name += whitespace

            template.name = new_name

            print(tn, "⇒", template.name, file=sys.stderr)

        # print(parsed)

        for k,v in pretty.items():
            print(k, "⇒", v, file=sys.stderr)

        if len(set(pretty)) == len(pretty):
            print("All replacements unique", file=sys.stderr)
        if not skip:
            text = parsed
        # if summary option is None, it takes the default i18n summary from
        # i18n subdirectory with summary_key as summary key.
        self.put_current(text, summary=self.opt.summary)

def main(*args: str) -> None:
    """
    Process command line arguments and invoke bot.
    If args is an empty list, sys.argv is used.
    :param args: command line arguments
    """
    options = {}
    # Process global arguments to determine desired site
    local_args = pywikibot.handle_args(args)
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    gen_factory = pagegenerators.GeneratorFactory()
    # Process pagegenerators arguments
    local_args = gen_factory.handle_args(local_args)
    # Parse your own command line arguments
    for arg in local_args:
        arg, sep, value = arg.partition(':')
        option = arg[1:]
        if option in ('summary', 'text'):
            if not value:
                pywikibot.input('Please enter a value for ' + arg)
            options[option] = value
        # take the remaining options as booleans.
        # You will get a hint if they aren't pre-defined in your bot class
        else:
            options[option] = True
    # The preloading option is responsible for downloading multiple
    # pages from the wiki simultaneously.
    gen = gen_factory.getCombinedGenerator(preload=True)
    if gen:
        # pass generator and private options to the bot
        bot = BasicBot(generator=gen, **options)
        bot.run()  # guess what it does
    else:
        pywikibot.bot.suggest_help(missing_generator=True)
if __name__ == '__main__':
    main()