用户:Antigng-bot/redirect

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <process.h>
#include <windows.h>
#include <time.h>
#include "network.h"
#include "convert.h"
#include "auth.h"
#include "zhconverter.h"
struct problemlist
{
	char *title;
	char *id;
	struct problemlist *next;
};
struct neditargv
{
	char *title;
	HTTP newtext;
	char *id;
	char *time;
};
int threadc[1024];
char *threads[1024];
HANDLE threadpool[1024];
CRITICAL_SECTION cs;
CRITICAL_SECTION tcs;
CRITICAL_SECTION hcs;
int threadnumber=0;
int action=0;
struct hashlist *hl=NULL;
struct problemlist *pbl=NULL;
char *rdtemp_u;
char *username=NULL;
char *passwd=NULL;
char *ns=NULL;
char *convertion_table=NULL;
int maxthread=256;
int doallpage=0;
static int parsearg(int argc,char *argv[])
{
	int hasu=0,hasp=0,hass=0,hasf=0;
	int i=0;
	doallpage=0;
	for(i=1;i<argc;i++)
	{
		if(argv[i][0]=='-'&&(argv[i+1]&&argv[i+1][0]!='-'))
		{
			switch(argv[i][1])
			{
			case 'u':
				username=argv[i+1];
				hasu=1;
				i++;
				break;
			case 'p':
				passwd=argv[i+1];
				hasp=1;
				i++;
				break;
			case 'n':
				ns=argv[i+1];
				i++;
				break;
			case 'T':
				maxthread=atoi(argv[i+1]);
				if(maxthread<1||maxthread>1024) maxthread=256;
				i++;
				break;
			case 'F':
				convertion_table=argv[i+1];
				hasf=1;
				i++;
				break;
			}
		}
	}
	if(!ns) ns="0";
	if(hasu&&hasp&&hasf) return 0;
	else return -1;
}
static int smartedit(struct neditargv *p,char *reason)
{
	HTTP res;
	char line[2048],url[4096]={0};
	char reason_e[512];
	char aft[1024],statusline[128];
	char cur_token[128];
	char err_type[128];
	char *erm[]={"code"};
	char *erv[1];
	int find;
	int has_err=0,token_err=0;
	int retry=0;
	URLEncode(reason,strlen(reason),reason_e,510);
	sprintf(url,"https://zh.wikipedia.org/w/api.php?action=edit&pageid=%s&basetimestamp=%s",p->id,p->time);
	find=sprintf(aft,"&summary=%s&bot=1&minor=1&nocreate=1&format=xml&token=",reason_e);
	if(find<0) return -4;
	do
	{
		res=hopen();
		retry++;
		while(!hastoken)
		{
			Sleep(100);
		}
		EnterCriticalSection(&cs);
		aft[find]=0;
		strcat(aft,token);
		LeaveCriticalSection(&cs);
		hrewind(p->newtext);
		if(!smartpost(url,p->newtext,aft,1,res))
		{
			return -1;
		}
		return -1;
		hgets(statusline,127,res);
		if(!strstr(statusline," 200"))
		{
			hclose(res);
			return -2;
		}
		skipresponseheader(res);
		token_err=has_err=0;
		while(!heof(res))
		{
			if(xmlparsetag(res,line)==XML_HAS_VALUE)
			{
				if(!strcmp(line,"error"))
				{
					has_err=1;
					xmlparsearg(res,1,erm,erv);
					if((!strcmp(err_type,"notoken"))||(!strcmp(err_type,"badtoken")))
					{
						token_err=1;
					}
					break;
				}
			}
		}
		if(token_err)
		{
			EnterCriticalSection(&cs);
			if(!strcmp(aft+find,cur_token)) hastoken=0;
			LeaveCriticalSection(&cs);
		}
		retry++;
	}while(token_err==1&&retry<3);
	if(has_err) return -3;
	else return 0;
}
static int pagecheck(char *pageid,char *basetime,HTTP f)
{
	HTTP newtext;
	char ch=0,cht=0;
	char rdtemplate[100];
	int alreadydone=0;
	int todo=0;
	newtext=hopen();
	hputs("&text=",6,newtext);
	todo=alreadydone=0;
	while(xmlpulltext(f,&ch)==XML_TEXT_CONTINUE)
	{
		smartURLEncode(ch,newtext);
		if(ch=='#'&&todo==0) todo=1;
		if(cht=='{'&&ch=='{')
		{
			alreadydone=1;
			break;
		}
		if(cht=='['&&ch=='['&&todo==1) todo=2;
		cht=ch;
	}
	if(alreadydone||todo!=2)
	{
		hclose(newtext);
		return 1;
	}
	else
	{
		struct neditargv p;
		URLEncode(rdtemp_u,strlen(rdtemp_u),rdtemplate,98);
		hputs(rdtemplate,strlen(rdtemplate),newtext);
		p.id=pageid;
		p.newtext=newtext;
		p.time=basetime;
		smartedit(&p,"bot: add redirect template");
	}
	hclose(newtext);
	return 0;
}
static int proceedchild(char *ids)
{
	char url[4096];
	char buf[8192];
	char pageid[256];
	char timestamp[256];
	char contentmodel[64],contentformat[64];
	const char *ttm[]={"pageid"};
	const char *tmm[]={"timestamp"};
	const char *cmm[]={"contentmodel","contentformat"};
	char *ttv[1];
	char *tmv[1];
	char *cmv[2];
	int result;
	int status;
	HTTP h;
	ttv[0]=pageid;
	tmv[0]=timestamp;
	cmv[0]=contentmodel;
	cmv[1]=contentformat;
	if(!ids)
	{
		return -1;
	}
	sprintf(url,"https://zh.wikipedia.org/w/api.php?action=query&format=xml&prop=revisions&rvprop=content|timestamp&pageids=%s&rvslots=main",ids);
	h=hopen();
	if(get(url,1,h))
	{
		hclose(h);
		return -2;
	}
	if(skipresponseheader(h))
	{
		hclose(h);
		return -3;
	}
	status=0;
	while(!heof(h))
	{
		result=xmlparsetag(h,buf);
		switch(status)
		{
		case 0:
			if(result==XML_HAS_VALUE&&!strcmp(buf,"page"))
			{
				if(xmlparsearg(h,1,ttm,ttv)==XML_HAS_VALUE)
				{
					if(atoi(pageid)) status=1;
				}
			}
			break;
		case 1:
			if(result==XML_HAS_VALUE&&!strcmp(buf,"rev"))
			{
				xmlparsearg(h,1,tmm,tmv);
				status=2;
			}
			break;
		case 2:
			if(result==XML_HAS_VALUE&&!strcmp(buf,"slot"))
			{
				xmlparsearg(h,2,cmm,cmv);
				if(!strcmp(contentmodel,"wikitext")&&!strcmp(contentformat,"text/x-wiki")) pagecheck(pageid,timestamp,h);
				status=0;
			}
			break;
		}
	}
	hclose(h);
	return 0;
}
int judgeredirect(char *a,char *b)
{
	unsigned int a_uni[4096],b_uni[4096];
	unsigned int hant_a[4096],hant_b[4096];
	int a_len,b_len;
	if(!strcmp(a,b)) return 0;
	if(utf8tounicode(a,a_uni)<=0) return 0;
	if(b_len=utf8tounicode(b,b_uni)<=0) return 0;
	a_len=zhconverter(a_uni,hant_a);
	b_len=zhconverter(b_uni,hant_b);
	if(a_len!=b_len) return 0;
	{
		int i=0;
		for(i=0;i<b_len;i++)
		{
			if(hant_a[i]!=hant_b[i]) return 0;
		}
	}
	return 1;
}

static int precheck(struct problemlist *p)
{
	char url[4096];
	char line[2048];
	HTTP f;
	char target[1024]={0};
	char *mtc[]={"title"};
	char *mtv[1];
	mtv[0]=target;
	sprintf(url,"https://zh.wikipedia.org/w/api.php?action=query&format=xml&pageids=%s",p->id);
	f=hopen();
	if(get(url,1,f))
	{
		hclose(f);
		return -1;
	}
	if(skipresponseheader(f))
	{
		hclose(f);
		return -1;
	}
	while(!heof(f))
	{
		xmlparsetag(f,line);
		if(!strcmp(line,"page"))
		{
			xmlparsearg(f,1,mtc,mtv);
			break;
		}
	}
	hclose(f);
	if(!target[0])
	{
		return -2;
	}
	if(judgeredirect(target,p->title))
	{
		proceedchild(p->id);
	}
	return 0;
}

void threadfunc(void *c)
{
	int i=*(int *)c;
	int ext=0;
	int result=0;
	struct problemlist *p;
	char *title,*id;
	while(!action) Sleep(1);
	while(1)
	{
		EnterCriticalSection(&hcs);
		if(p=pbl)
		{
			title=pbl->title;
			id=pbl->id;
			pbl=pbl->next;
		}
		else ext=1;
		LeaveCriticalSection(&hcs);
		if(ext) break;
		else
		{
			result=precheck(p);
			if(id) free(id);
			if(title) free(title);
			free(p);
		}
	}
	if(result) exit(-1);
	EnterCriticalSection(&tcs);
	threadnumber--;
	LeaveCriticalSection(&tcs);
	return ;
} 
int threadini(int count)
{
	int i=0;
	int flag=0;
	threadnumber=0;
	for(i=0;i<count;i++)
	{
		threadc[i]=i;     
		flag=_beginthread(threadfunc,0,(void *)(threadc+i));
		if(flag>0) threadnumber++;
	}
    return 0;
}
int query(char *ns)
{
	HTTP f;
	char line[2048]={0},url[4096]={0},snd[4096]={0},id[512]={0},title[512]={0},sroffset[2048]={0},offseto[512]={0};
	int status=0,next=0,count=0;
	struct problemlist *temp=0;
	char *ctm[]={"arcontinue"};
	char *ctv[1];
	char *idm[]={"fromid","title"};
	char *idv[2];
	ctv[0]=offseto;
	idv[0]=id;
	idv[1]=title;
	sprintf(url,"https://zh.wikipedia.org/w/api.php?action=query&format=xml&list=allredirects&arlimit=5000&arnamespace=%s&arprop=title|ids",ns);
	do
	{
		strcpy(snd,url);
		if(next)
		{
			strcat(snd,"&arcontinue=");
			strcat(snd,sroffset);
		}
		f=hopen();
		if(get(snd,1,f))
		{
			hclose(f);
			return 1;
		}
		skipresponseheader(f);
		next=0;status=0;
		do
		{
			xmlparsetag(f,line);
			if(!next)
			{
				if(!strcmp(line,"continue"))
				{
					xmlparsearg(f,1,ctm,ctv);
					URLEncode(offseto,strlen(offseto),sroffset,990);
					next=1;
				}
			}
			if(!strcmp(line,"r"))
			{
				xmlparsearg(f,2,idm,idv);
				temp=(struct problemlist *)malloc(sizeof(struct problemlist));
				temp->title=(char *)malloc(strlen(title)+5);
				strcpy(temp->title,title);
				temp->id=(char *)malloc(strlen(id)+5);
				strcpy(temp->id,id);
				temp->next=pbl;
				pbl=temp;
			}
		}while(!heof(f));
		hclose(f);
	}while(next);
    return 0;
}
#ifdef DEBUG_MODE
int main(void)
{
	printf("%d\n",converterini("D:\Apache24\htdocs\w\includes\ZhConversion.php"));
	printf("res=%d\n",judgeredirect(G2U("树"),G2U("樹")));//紀念巡迴演唱會 纪念巡回演唱会
	return 0;
}
#else
int main(int argc,char *argv[])
{
	int count=0;
	if(parsearg(argc,argv))
	{
		printf("usage: -u username -p passwd -F conversion_table_file [-T concurrency -n namespace]\n");
		return -1;
	}
	if(converterini(convertion_table))
	{
		printf("Load conversion table error!\n");
		return -2;
	}
	rdtemp_u=G2U("{{简繁重定向}}");
	InitializeCriticalSection(&cs);
	InitializeCriticalSection(&tcs);
	InitializeCriticalSection(&hcs);
	buckini(20);
	if(login(username,passwd)) 
	{
		printf("Login error!\n");
		return -3;
	}
	hastoken=0;
	printf("Login complete.\n");
	fflush(stdout);
	_beginthread(tokenmanage,0,0);
	query(ns);
	if(pbl==NULL)
	{
		printf("No page!\n");
		return -4;
	}
	printf("Query complete.\n");
	action=0;
	threadini(maxthread);
	action=1;
	while(1)
	{
		EnterCriticalSection(&hcs);
		if(pbl!=NULL)
		{
			LeaveCriticalSection(&hcs);
			Sleep(1000);
		}
		else
		{
			LeaveCriticalSection(&hcs);
			break;
		}
	}
	count=0;
	while(count<60)
	{
		count++;
		EnterCriticalSection(&tcs);
		if(threadnumber>0)
		{
			LeaveCriticalSection(&tcs);
			printf("Waiting for all threads to exit. Current thread number: %d\n",threadnumber);
		}
		else
		{
			LeaveCriticalSection(&tcs);
			break;
		}
		fflush(stdout);
		Sleep(1000);
	}
	if(threadnumber==0)
	{
		DeleteCriticalSection(&tcs);
		DeleteCriticalSection(&hcs);
		hastoken=-1;
		buckdestroy();
	}
	else printf("%d threads left.\n",threadnumber);
	printf("---------------Ok done.---------------\n");	 
	fflush(stdout);
	system("PAUSE");
	DeleteCriticalSection(&cs);
	return 0;
}
#endif