#!/usr/bin/python3
import pywikibot
import pymysql
import re
import os
import time
def getList(lang_code, query_name, file_name, query_params=None):
# Database connection details
# Note: If you are using Toolforge, you may ignore the database username and password
db_hostname_format = lang_code + "wiki.analytics.db.svc.wikimedia.cloud" # Hostname of the database server
db_port = 3306 # Port number for the database server
# db_username = "" # Add your actual database username credential (if not using Toolforge)
# db_password = "" # Add your actual database password credential (if not using Toolforge)
db_name_format = lang_code + "wiki_p" # Name of the target database
db_connect_file = "~/replica.my.cnf" # path to the "my.cnf" file
# Create a connection to the database
connection = pymysql.connect(
host=db_hostname_format,
port=db_port,
# user=db_username,
# password=db_password,
database=db_name_format,
read_default_file=db_connect_file, # "my.cnf" file contains user and password and read these parameters from under the [client] section.
charset='utf8'
)
# Create a cursor
cursor = connection.cursor()
# Use the kuwiki_p database
cursor.execute("USE " + lang_code + "wiki_p;")
# Execute the query with parameters if provided
if query_params is not None:
cursor.execute(query_name, query_params)
else:
cursor.execute(query_name)
# Fetch the results
results = cursor.fetchall()
# Close the cursor and the database connection
cursor.close()
connection.close()
# Extract page titles from results
page_titles = []
for result in results:
ku_page_title = result[0] # Assuming there's only one column in the result
ku_page_title = ku_page_title.decode('utf-8')
ku_page_title = ku_page_title.replace('_', ' ')
page_titles.append(ku_page_title)
return page_titles
def treat_page(page_title):
site = pywikibot.Site("ku", "wikipedia")
page = pywikibot.Page(site, page_title)
# Check if the page exists
if not page.exists():
print(f"Page {page.title()} does not exist. Skipping...")
return
sereke = page.toggleTalkPage()
if not sereke.exists():
print(f"rûpela {sereke.title} does not exist. Skipping...")
return
if page.isRedirectPage():
print(f"Skipping page {page.title()} - it's a redirect.")
return
if "{{Serê gotûbêjê" in page.text or "{{serê gotûbêjê" in page.text or "{{Talk header" in page.text or "{{talk header" in page.text:
print("Skipping page - {{Serê gotûbêjê is already in the text.")
return
if "{{jêbirin" in page.text or "{{Jêbirin" in page.text:
print("Skipping page - {{Jêbirin is in the text.")
return
# Remove whitespace characters, dots, and commas from the page text
cleaned_text = re.sub(r'[\s.,]', '', page.text)
# Check if the page is empty or has other content
if not cleaned_text:
# If the page is empty, add the {{jêbirin|rûpela vala}} template
template_text = "{{jêbirin|Rûpela vala}}"
summary = f"Bot: +{template_text}"
else:
# If the page has content, add the specified template_text
template_text = "{{Serê gotûbêjê}}"
summary = f"Bot: {{{{[[Şablon:Serê gotûbêjê|Serê gotûbêjê]]}}}} lê hat zêdekirin"
# Add the template to the page text
page.text = f"{template_text}\n\n{page.text}"
try:
page.save(summary=summary)
except pywikibot.exceptions.LockedPageError:
print(f'Skip locked page {page.title(as_link=True)}')
except Exception as e:
print(f"Error editing page {page.title()}: {str(e)}")
# query
ku_query = """
SELECT
CASE
WHEN page_namespace = 1 THEN CONCAT('Gotûbêj:', page_title)
WHEN page_namespace = 5 THEN CONCAT('Gotûbêja Wîkîpediyayê:', page_title)
WHEN page_namespace = 11 THEN CONCAT('Gotûbêja şablonê:', page_title)
WHEN page_namespace = 15 THEN CONCAT('Gotûbêja kategoriyê:', page_title)
WHEN page_namespace = 829 THEN CONCAT('Gotûbêja modulê:', page_title)
ELSE page_title
END AS full_page_title
FROM page
WHERE page_is_redirect = 0
AND page_namespace IN (1,5,11,15,829)
AND page_id NOT IN (
SELECT tl_from FROM templatelinks JOIN linktarget ON tl_target_id = lt_id
WHERE lt_namespace = 10
AND lt_title IN (
'Serê_gotûbêjê',
'Talk_header'
)
)
AND page_title NOT LIKE '%Arşîv%'
AND page_title NOT LIKE '%/Jêbirin%';
"""
ku_file_name = "testtext.txt"
ku_page_titles = getList("ku", ku_query, ku_file_name)
if not ku_page_titles:
print("No results found from the query. %s!" % (time.ctime()))
else:
all_pages_processed = True # Assume all pages will be processed successfully
for gotubej in ku_page_titles:
if gotubej == "Gotûbêj:WP:VS":
print("Skipping page 'Gotûbêj:WP:VS'")
continue # Skip the current iteration and move to the next page
try:
treat_page(gotubej)
except Exception as e:
print(f"Error processing gotubej '{gotubej}': {str(e)}")
all_pages_processed = False # Set to False if any page processing fails
if all_pages_processed:
print("All pages processed successfully at %s!" % (time.ctime()))
else:
print("Some pages failed to process at %s!" % (time.ctime()))