# Fork of [[en:User:Qwerfjkl/VEref.py]]
# Fork of [[User:Psiĥedelisto/VisualEditor ref namer.py]]
#
# (C) Pywikibot team, 2006-2021
#
# Distributed under the terms of the MIT license.
#
import mwparserfromhell
from mwparserfromhell.wikicode import Wikicode
import string
import re
import sys
import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import (
AutomaticTWSummaryBot,
ConfigParserBot,
ExistingPageBot,
NoRedirectPageBot,
SingleSiteBot,
)
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {'¶ms;': pagegenerators.parameterHelp} # noqa: N816
class BasicBot(
# Refer pywikobot.bot for generic bot classes
SingleSiteBot, # A bot only working on one site
ConfigParserBot, # A bot which reads options from scripts.ini setting file
# CurrentPageBot, # Sets 'current_page'. Process it in treat_page method.
# # Not needed here because we have subclasses
ExistingPageBot, # CurrentPageBot which only treats existing pages
NoRedirectPageBot, # CurrentPageBot which only treats non-redirects
AutomaticTWSummaryBot, # Automatically defines summary; needs summary_key
):
"""
An incomplete sample bot.
:ivar summary_key: Edit summary message key. The message that should be
used is placed on /i18n subdirectory. The file containing these
messages should have the same name as the caller script (i.e. basic.py
in this case). Use summary_key to set a default edit summary message.
:type summary_key: str
"""
summary_key = 'basic-changing'
update_options = {
'replace': False, # delete old text and write the new text
'summary': "Bot: Navên referansan ên [[en:WP:VE|VisualEditor]]ê hat sererastkirin", # your own bot summary
'text': None, # add this text from option. 'Test' is default
'top': False, # append text on top of the page
}
def treat_page(self) -> None:
"""Load the given page, do some changes, and save it."""
text = self.current_page.text
# Taken from [[User:Psiĥedelisto/VisualEditor ref namer.py]]
skip = True
parsed = mwparserfromhell.parse( text )
tags = list(filter(None, [t if t.has("name") else None for t in parsed.ifilter(forcetype=mwparserfromhell.wikicode.Tag, matches="<\\s*ref\\s*", recursive=True)]))
refs = list(filter(lambda s: re.search("^:\d+$", str(s.get("name").value)) and not re.search("/>$", str(s)), tags))
pretty = dict()
for ref in refs:
template = ref.contents.get(0)
if not isinstance(template, mwparserfromhell.nodes.Template): # Check if template is a Template object
continue
if template.has("vauthors"):
v = str(template.get("vauthors").value)
elif template.has("authors"):
v = str(template.get("authors").value)
elif template.has("last"):
v = str(template.get("last").value)
elif template.has("first"):
v = str(template.get("first").value)
else:
continue
v = v.strip()
if "," in v:
last = v[:v.index(",")]
elif " " in v:
last = v[:v.index(" ")]
else:
last = v
punctuation = set(string.punctuation)
# Strip punctuation characters from the last word directly
last = ''.join([char for char in last if char not in punctuation])
if re.match(r'^[0-9\-.,]+$', last):
last = False
else:
# Check if the last name contains Latin alphabet characters
latin_alphabet = set(string.ascii_letters)
if not any(char in latin_alphabet for char in last):
last = False
date = False
if template.has("date"):
date = str(template.get("date").value)
elif template.has("year"):
date = str(template.get("year").value)
if date and last:
date = re.search("\d{4}", date)[0]
pretty[str(ref.get("name").value)] = "{}{}".format(last, date)
for tag in parsed.ifilter(forcetype=mwparserfromhell.wikicode.Tag, matches="<\\s*ref\\s*", recursive=True):
if not tag.has("name"): continue
k = str(tag.get("name").value)
if k in pretty:
tag.attributes[0].value = pretty[k]
skip = False # Don't skip if there are non-cosmetic changes
# Inside the loop
for template in parsed.ifilter_templates():
tn = template.name.strip() # Strip whitespace from the template name
new_name = ""
if tn.lower() in ["rp", "ill", "bz", "pz", "lang", "ziman", "respell", "abbr"] or tn.lower().startswith(
("lang-", "ziman-", "bi-")):
new_name = tn[0].lower() + tn[1:]
else:
new_name = tn[0].upper() + tn[1:]
# Preserve whitespace characters
whitespace_index = template.name.find(tn) + len(tn)
whitespace = template.name[whitespace_index:]
new_name += whitespace
template.name = new_name
print(tn, "⇒", template.name, file=sys.stderr)
# print(parsed)
for k,v in pretty.items():
print(k, "⇒", v, file=sys.stderr)
if len(set(pretty)) == len(pretty):
print("All replacements unique", file=sys.stderr)
if not skip:
text = parsed
# if summary option is None, it takes the default i18n summary from
# i18n subdirectory with summary_key as summary key.
self.put_current(text, summary=self.opt.summary)
def main(*args: str) -> None:
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
:param args: command line arguments
"""
options = {}
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
gen_factory = pagegenerators.GeneratorFactory()
# Process pagegenerators arguments
local_args = gen_factory.handle_args(local_args)
# Parse your own command line arguments
for arg in local_args:
arg, sep, value = arg.partition(':')
option = arg[1:]
if option in ('summary', 'text'):
if not value:
pywikibot.input('Please enter a value for ' + arg)
options[option] = value
# take the remaining options as booleans.
# You will get a hint if they aren't pre-defined in your bot class
else:
options[option] = True
# The preloading option is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = gen_factory.getCombinedGenerator(preload=True)
if gen:
# pass generator and private options to the bot
bot = BasicBot(generator=gen, **options)
bot.run() # guess what it does
else:
pywikibot.bot.suggest_help(missing_generator=True)
if __name__ == '__main__':
main()