Bikarhêner:Balyozxane/skrîpt/py/citeKurdifier.py
(Ji Bikarhêner:Balyozxane/skrîpt/py/citekurdifier.py hat beralîkirin)
"""
python pwb.py updatewin -f:"citekurdifier.py" -s:"first_upper ji bo ziman"
To-do:
"""
import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import SingleSiteBot, ConfigParserBot, AutomaticTWSummaryBot
from kucosmetics import CANCEL, CosmeticChangesToolkit
from pywikibot.tools import first_upper
import json
import re
import mwparserfromhell
class CiteKurdifierBot(
SingleSiteBot,
ConfigParserBot,
AutomaticTWSummaryBot,
):
use_redirects = False
update_options = {
'async': False,
'showdiff': False,
'ignore': CANCEL.MATCH,
}
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.bot_name = "Bikarhêner:Balyozxane/skrîpt/py/citeKurdifier.py"
# Fetch JSON content from the specified page
json_title = "Bikarhêner:Balyozxane/skrîpt/json/citeKurdifier.json"
json_page = pywikibot.Page(self.site, json_title)
self.json_content = json_page.text
# Load JSON content into a dictionary
try:
self.parameter_conversions = json.loads(self.json_content)
self.cite_templates = self.parameter_conversions.get('templates', {})
self.basic_params = self.parameter_conversions.get('basic_params', {})
self.numbered_params = self.parameter_conversions.get('numbered_params', {})
self.months = self.parameter_conversions.get('month_list', {})
self.date_params = self.parameter_conversions.get('date_params', {})
self.url_status = self.parameter_conversions.get('rewşa_urlyê', {})
self.ziman = self.parameter_conversions.get('ziman', {})
except json.JSONDecodeError as e:
pywikibot.error(f"Error decoding JSON content: {e}")
self.parameter_conversions = {}
def do_kozmetik(self, old_text):
kozmetik_cebu = ""
cc_toolkit = CosmeticChangesToolkit(self.current_page,
ignore=self.opt.ignore)
new_text, summaries = cc_toolkit.change(old_text)
applied_summaries = ', '.join(summaries.values())
if new_text is not False and new_text != old_text:
kozmetik_cebu = "; paqijiyên kozmetîk"
if applied_summaries:
kozmetik_cebu += f' ({applied_summaries}.)'
return new_text, kozmetik_cebu
def treat_page(self) -> None:
page = self.current_page
if not page.exists():
pywikibot.error(f"Skipping {page.title()} since it does not exist")
return
if page.namespace() != 0 and page.title() != 'Bikarhêner:Balyozxane/ceribandin':
pywikibot.output(f"Skipping page '{page.title()}' because it is not in namespace 0")
return
text = page.text
wikicode = mwparserfromhell.parse(text)
# Iterate through each template in the page
for template in wikicode.filter_templates():
# Check if the template name matches any of the citation templates
if self.should_process_template(template):
self.replace_parameters(template.params)
self.replace_date_values(template.params)
self.replace_param_values(template.params)
self.remove_params(template)
self.replace_template_name(template)
# Save the page
new_text = str(wikicode)
if ''.join(text.split()) != ''.join(new_text.split()):
cleaned_new_text, kozmetik_cebu = self.do_kozmetik(new_text)
summary = f'[[{self.bot_name}|Bot]]: Kurdîkirina çavkaniyan{kozmetik_cebu}'
self.put_current(
cleaned_new_text,
summary=summary,
asynchronous=self.opt['async'],
show_diff=self.opt['showdiff']
)
def should_process_template(self, template):
"""Checks if a template should be processed based on self.cite_templates."""
template_name = template.name
template_name_lower = template_name.strip().lower()
for key, value in self.cite_templates.items():
if (
key.lower() == template_name_lower
or value.lower() == template_name_lower
):
return True
return False
def replace_template_name(self, template):
template_name = template.name.strip().lower()
try:
new_template_name = self.cite_templates[template_name]
template.name = f'{new_template_name} '
except KeyError:
pass
def remove_params(self, template):
if template.has("df"):
template.remove("df")
def replace_parameters(self, params):
for param in params:
param_name = param.name.strip()
# Basic replacements for parameter names
if param_name.lower() in self.basic_params:
new_param_name = self.basic_params[param_name.lower()]
param.name = new_param_name
# Numbered arguments replacements using regex
for pattern, new_param_name in self.numbered_params.items():
match = re.match(pattern, param_name)
if match:
param.name = re.sub(pattern, new_param_name, param_name)
# Add empty an space after param value
param.value = f'{param.value.strip()} '
def replace_date_values(self, params):
for param in params:
param_name = param.name.strip()
# Date parameter handling
if param_name.lower() in self.date_params:
new_value = ""
# First replacement pattern for existing format: Day Month Year
match1 = re.search(r"(?P<day>\d+)\s+(?P<month>\w+)\s+(?P<year>\d+)", param.value.strip())
# Second replacement pattern for new format: Month Day, Year
match2 = re.search(r"(?P<month>\w+)\s+(?P<day>\d+),\s+(?P<year>\d+)", param.value.strip())
# If the first replacement pattern matches
if match1:
# Convert month name to Kurdish using month_list
kurdish_month = self.months.get(match1.group("month").lower(), match1.group("month"))
# Construct new value in Kurdish format
new_value = f"{match1.group('day')} {kurdish_month} {match1.group('year')}"
# If the second replacement pattern matches
elif match2:
# Convert month name to Kurdish using month_list
kurdish_month = self.months.get(match2.group("month").lower(), match2.group("month"))
# Construct new value in Kurdish format
new_value = f"{match2.group('day')} {kurdish_month} {match2.group('year')}"
# Update the parameter value if a match was found
if new_value:
param.value = new_value.strip() + " "
def replace_param_values(self, params):
for param in params:
param_name = param.name.strip()
if param_name.lower() == "çap":
param_val = str(param.value.strip())
match = re.search(r"^(\d+) ed\.?$", param_val)
match2 = re.search(r"^(\d+)(th|rd|st|nd)$", param_val)
if match:
new_value = re.sub(r"^(\d+) ed\.?$", r"\1", param_val)
param.value = new_value.strip() + " "
elif match2:
new_value = re.sub(r"^(\d+)(th|rd|st|nd)$", r"\1", param_val)
param.value = new_value.strip() + " "
elif param_name.lower() == "rewşa-urlyê":
param_val = param.value.strip()
if param_val:
try:
new_value = self.url_status[param_val]
except KeyError:
# Handle the missing key case:
new_value = param_val # Keep the original value
param.value = new_value.strip() + " "
elif param_name.lower() == "ziman":
param_val = param.value.strip()
if param_val:
try:
new_value = self.ziman[first_upper(param_val)]
except KeyError:
# Handle the missing key case:
new_value = param_val # Keep the original value
param.value = new_value.strip() + " "
elif param_name.lower() == "sernav":
param_val = param.value.strip()
if param_val.lower() in ["archived copy", "arşivlenmiş kopya"]:
param.value = "Kopîkirina arşîvê "
def main(*args: str) -> None:
local_args = pywikibot.handle_args(args)
gen_factory = pagegenerators.GeneratorFactory()
local_args = gen_factory.handle_args(local_args)
options = {}
for arg in local_args:
option, _, value = arg.partition(':')
if option in ('-always', '-async', '-showdiff'):
options[option[1:]] = True
elif option == '-ignore':
value = value.upper()
try:
options['ignore'] = getattr(CANCEL, value)
except AttributeError:
raise ValueError(f'Unknown ignore mode {value!r}!')
gen = gen_factory.getCombinedGenerator(preload=True)
if not pywikibot.bot.suggest_help(missing_generator=not gen):
bot = CiteKurdifierBot(generator=gen, **options)
bot.run()
if __name__ == '__main__':
main()