#!/usr/bin/python
# -*- coding: utf-8 -*-
from xml.dom import minidom
import os
import sys
import urllib
import re, time, datetime
import wikipedia, config, catlib
import xml.parsers.expat
site = wikipedia.getSite()
tlist=[]
def GetPage(title):
#"""从最近更改API获取XML正文"""
title=title.urlname()
# title=urllib.quote(title)
baseurl = u'http://zh.wikipedia.org/w/api.php?action=query&prop=revisions&titles=%s&rvlimit=1&rvprop=timestamp&format=xml'
uo=wikipedia.MyURLopener()
url = baseurl % title
u = uo.open(url)
wikipedia.output(u'URL:\n%s' % url)
return u.read()
def timestampattrs(name, attrs):
if name == 'rev':
try:
timestamp=attrs[u'''timestamp''']
t=time.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ')
Y=int(time.strftime("%Y",t))
m=int(time.strftime("%m",t))
d=int(time.strftime("%d",t))
h=int(time.strftime("%H",t))
min=int(time.strftime("%M",t))
s=int(time.strftime("%S",t))
temptime=datetime.datetime(Y,m,d,h,min,s)
tlist.append(temptime)
except KeyError:
return
else:
return
def Parsexml(html, start_element):
p = xml.parsers.expat.ParserCreate()
p.StartElementHandler = start_element
p.returns_unicode = True
p.Parse(html)
category=u'Category:正在翻譯的條目'
cat=catlib.Category(site, category)
listOfArticles = cat.articlesList(recurse=True)
testcategory=u'Category:过期的翻译条目'
testcat=catlib.Category(site, testcategory)
testlistOfArticles = testcat.articlesList()
for article in listOfArticles:
html=GetPage(article)
Parsexml(html, timestampattrs)
# print temptime
if datetime.datetime.utcnow()- tlist[0] > datetime.timedelta(days=90) and article not in testlistOfArticles:
#articles.append(articles)
# wikipedia.output(u'Fond [[%s]]' %(article.title())
text=article.get()
tt=u'{{过期翻译|time=%s}}\n' % datetime.datetime.isoformat(tlist[0])
text=tt+text
article.put(text, u'Bot更新:超过3个月未有翻译,标记清理')
wikipedia.output(u'[[%s]] have 90 days No Translating: last edit time at %s' % (article.title(), datetime.datetime.isoformat(tlist[0])))
if datetime.datetime.utcnow()- tlist[0] > datetime.timedelta(days=90) and article in testlistOfArticles:
wikipedia.output(u'[[%s]] have in %s, wait Cleanup! ' % article.title())
tlist=[]