Bikarhêner:Balyozxane/skrîpt/py/addSitelinkandLabel.py

#!/usr/bin/env python3
"""
python pwb.py updatewin -f:"addSitelinkandLabel.py" -s:"paqijî"

python pwb.py addSitelinkandLabel -lang:ku -family:wikipedia -transcludes:'Înterwîkî etîket û danasîn' -always

"""

import pywikibot
from pywikibot import pagegenerators, textlib
from pywikibot.bot import (
    AutomaticTWSummaryBot,
    ConfigParserBot,
    ExistingPageBot,
    SingleSiteBot,
)
import mwparserfromhell
from kucosmetics import CANCEL, CosmeticChangesToolkit
import urllib.parse
import requests
import mytools
from mytools import ucfirst

VERBOSE = False
TESTING = False

# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {'&params;': pagegenerators.parameterHelp}  # noqa: N816


class SitelinkBot(
    # Refer pywikobot.bot for generic bot classes
    SingleSiteBot,  # A bot only working on one site
    ConfigParserBot,  # A bot which reads options from scripts.ini setting file
    # CurrentPageBot,  # Sets 'current_page'. Process it in treat_page method.
    #                  # Not needed here because we have subclasses
    ExistingPageBot,  # CurrentPageBot which only treats existing pages
    AutomaticTWSummaryBot,  # Automatically defines summary; needs summary_key
):
    use_redirects = False  # treats non-redirects only
    summary_key = 'basic-changing'

    update_options = {
        'async': False,
        'showdiff': False,
        'ignore': CANCEL.MATCH,
    }

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.bot_name = "User:Balyozxane/skrîpt/py/addSitelinkandLabel.py"

        if TESTING:
            self.interwiki_templates = ['Îed', 'Înterwîkî etîket û danasîn']
        else:
            # Retrieve redirects for templates
            self.interwiki_templates = mytools.get_template_redirects(self.site, "Înterwîkî etîket û danasîn")

    def parse_template(self, wikicode):
        # Initialize variables
        lang_code = None
        interwiki_title = None
        label = None
        description = None
        for template in wikicode.filter_templates():
            if ucfirst(template.name) in self.interwiki_templates:

                if template.has(1) and template.has(2):
                    lang_code = str(template.get(1).value).strip()
                    interwiki_title = str(template.get(2).value).strip()
                    print(interwiki_title)
                if template.has("e"):
                    label = str(template.get("e").value).strip()
                if template.has("d"):
                    description = str(template.get("d").value).strip()

        return lang_code, interwiki_title, label, description

    def add_sitelink(self, item_id):
        repo = self.site.data_repository()

        try:
            item = pywikibot.ItemPage(repo, title=item_id)
            item.setSitelink(self.current_page, summary=f"Added sitelink {self.current_page} ([[User:Balyozbot#Task1|Task 1]])")
            print(f"Sitelink added for {self.current_page.title()} to {item_id}")

        except pywikibot.exceptions.OtherPageSaveError as e:
            print(f"Error adding sitelink for {self.current_page.title()}: {str(e)}")
        except Exception as e:
            print(f"Error in add_sitelink function: {str(e)}")

    @staticmethod
    def add_description_to_wikidata(item_id, label, description):
        site = pywikibot.Site('wikidata', 'wikidata')
        repo = site.data_repository()

        try:
            # Create ItemPage using QID
            item = pywikibot.ItemPage(repo, title=item_id)
            item_dict = item.get()

            labels = item_dict.get('labels', {})
            descriptions = item_dict.get('descriptions', {})

            existing_label = labels.get('ku', None)
            existing_description = descriptions.get('ku', None)

            if existing_label is None:
                item.editLabels({"ku": label}, summary=f"Added [ku] label: {label} ([[User:Balyozbot#Task1|Task 1]])")
                print(f"label '{label}' added to {item_id}")
            else:
                print(f"Label already exists: {existing_label}")

            if existing_description is None:
                item.editDescriptions({"ku": description},
                                      summary=f"Added [ku] description: {description} ([[User:Balyozbot#Task1|Task 1]])")
                print(f"Description '{description}' added to {item_id}")
            else:
                print(f"Description already exists: {existing_description}")

        except pywikibot.exceptions.OtherPageSaveError as e:
            print(f"Error adding label and description to Wikidata for item {item_id}: {str(e)}")
        except Exception as e:
            print(f"Error in add_description_to_wikidata function: {str(e)}")

    def remove_template(self, wikicode):
        for template in wikicode.filter_templates():
            template_name = ucfirst(template.name)
            if template_name in self.interwiki_templates:
                wikicode.remove(template)

        return str(wikicode)

    @staticmethod
    def get_qid(lang_code, title):
        encoded_title = urllib.parse.quote(title)
        url = f"https://www.wikidata.org/w/api.php?action=wbgetentities&sites={lang_code}wiki&titles={encoded_title}&props=sitelinks&format=json"
        response = requests.get(url)
        data = response.json()
        qid = None
        if "entities" in data:
            entity = next(iter(data["entities"].values()))  # Get the first (and only) entity
            qid = entity.get('id')  # Retrieve the entity ID
            if VERBOSE and qid:
                print(f"Entity ID for '{lang_code}:{title}' fetched: '{qid}'")
        else:
            if VERBOSE:
                print(f"No entity found for '{lang_code}:{title}'")

        return qid

    def do_kozmetik(self, old_text):
        kozmetik_cebu = ""
        cc_toolkit = CosmeticChangesToolkit(self.current_page,
                                            ignore=self.opt.ignore)
        new_text, summaries = cc_toolkit.change(old_text)
        applied_summaries = ', '.join(summaries.values())
        if new_text is not False and new_text != old_text:
            kozmetik_cebu = "; paqijiyên kozmetîk"
            if applied_summaries:
                kozmetik_cebu += f' ({applied_summaries}.)'

        return new_text, kozmetik_cebu

    def treat_page(self) -> None:
        page = self.current_page

        if page.namespace() != 0:
            if VERBOSE:
                print("Skipping Namespace not 0.")
            return

        text = page.text
        wikicode = mwparserfromhell.parse(text)

        lang_code, interwiki_title, label, description = self.parse_template(wikicode)
        print(lang_code, interwiki_title, label, description)
        if lang_code and interwiki_title:
            lang_site = pywikibot.Site(lang_code, 'wikipedia')
            interwiki_page = pywikibot.Page(lang_site, interwiki_title)

            # Check if the page is a redirect
            if interwiki_page.isRedirectPage():
                # If it is a redirect, update interwiki_title to the redirected page title
                interwiki_title = interwiki_page.getRedirectTarget().title()
            print(interwiki_page.title())
            if interwiki_page.isDisambig():
                # Add "Kategorî:Rûpelên bi înterwîkiyê yên diçin rûpelên cudakirinê" only if it doesn't exist in the page
                category_link = '[[Kategorî:Rûpelên bi înterwîkiyê yên diçin rûpelên cudakirinê]]'
                kat_heye = mytools.is_category_in_page(page, 'Rûpelên bi înterwîkiyê yên diçin rûpelên cudakirinê')

                if not kat_heye:
                    page.text = text + "\n" + category_link
                    page.save(summary=f"Bot: +{category_link}")
                pywikibot.output(f"Skipping disambiguation page: {page.title()}")
                return

            item_id = self.get_qid(lang_code, interwiki_title)

            if not item_id:
                print(f"Unable to get Wikidata ID for page: {page.title()}. Skipping.")
                return

            try:
                self.add_sitelink(item_id)

            except pywikibot.exceptions.OtherPageSaveError as e:
                print(
                    f"Error saving sitelink for page {page.title()}: {str(e)}. Skipping the current page.")
                return
            except Exception as e:
                print(f"Error processing page {page.title()}: {str(e)}. Skipping the current page.")
                return

            if not label:
                label = page.title(with_ns=True)

            self.add_description_to_wikidata(item_id, label, description)

            updated_text = self.remove_template(wikicode)

            if text != updated_text:
                kozmetik_cebu = ""
                cleaned_new_text, kozmetik_cebu = self.do_kozmetik(updated_text)

                summary = f'[[{self.bot_name}|Bot]]: Rûpel bi Wîkîdaneyê hat girêdan û şablon hat rakirin{kozmetik_cebu}'

                self.put_current(
                    cleaned_new_text,
                    summary=summary,
                    asynchronous=self.opt['async'],
                    show_diff=self.opt['showdiff']
                )


def main(*args: str) -> None:
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    :param args: command line arguments
    """
    options = {}
    # Process global arguments to determine desired site
    local_args = pywikibot.handle_args(args)

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    gen_factory = pagegenerators.GeneratorFactory()

    # Process pagegenerators arguments
    local_args = gen_factory.handle_args(local_args)

    # Parse your own command line arguments
    for arg in local_args:
        arg, _, value = arg.partition(':')
        option = arg[1:]
        if option in ('-always', '-async', '-showdiff'):
            options[option[1:]] = True
        elif option == '-ignore':
            value = value.upper()
            try:
                options['ignore'] = getattr(CANCEL, value)
            except AttributeError:
                raise ValueError(f'Unknown ignore mode {value!r}!')
        # take the remaining options as booleans.
        # You will get a hint if they aren't pre-defined in your bot class
        else:
            options[option] = True
    # The preloading option is responsible for downloading multiple
    # pages from the wiki simultaneously.
    gen = gen_factory.getCombinedGenerator(preload=True)

    # check if further help is needed
    if not pywikibot.bot.suggest_help(missing_generator=not gen):
        # pass generator and private options to the bot
        bot = SitelinkBot(generator=gen, **options)
        bot.run()  # guess what it does


if __name__ == '__main__':
    main()