早期上网经常需要使用代理服务器,现在用的比较少了,大家更耳熟能详的反而是“反向代理”如Nginx。
代理服务器一般用作局域网上网,而反向代理则是把来自互联网的连接转发到局域网上,作用刚好相反。
HTTP协议自身就带有对代理服务器的支持。HTTP协议目前主要有多个版本,0.9太简单,基本不见了,1.0只支持一个连接一个请求,1.1则支持长连接,2.0极大复杂化了传输过程,支持多路复用。协议版本这么多,但是代理服务器作为中间商,可以选择一个较低的版本,用户的客户端和服务器一般都有能力适应多个版本。
代理服务器可以选择比较简单的HTTP1.0版本,一个连接就是一个请求,只需要在连接建立之后做处理,处理完请求就是简单的数据转发了。
HTTP1.0协议对代理服务器的支持基本就是两点:
- 请求行对使用绝对URL
- 专用于代理服务器的Proxy-XXXX头标
代理服务器要做的事情是:
- 取出请求行的服务器域名和端口并擦除(擦除后与直接请求的请求行相同)
- 将协议版本降低为自己支持的版本
- 根据proxy-XXXX头标处理并擦除
- 像直接请求一样访问服务器
- 转发数据给用户
前面说的“擦除”是把后面的数据前移而不是设置为空格,设置为空格并不符合HTTP协议,服务器一般不能理解。
原则上代理服务器可以支持客户端和服务器是不同的协议版本,比如客户端是1.0而服务器是1.1,这将极大地影响程序复杂度。
虽然HTTP的BODY与代理服务器处理无关,只需要接受完头部就可以处理,但是最好整个请求完整发送,因为有些服务器不能处理请求头和BODY分开的情形。
代理服务器认证
代理服务器通过Proxy-XXXX头标进行认证,这个认证是代理服务器的认证而不是用户要访问的服务器的认证。代理服务器认证完后就应该删除这些头标,因为这些头标对目标服务器毫无意义。
隧道请求CONNECT
CONNECT是个不常用的头标,专门用于代理。代理服务器取得目标服务器后直接连上去就可以了,然后就是双向转发数据。
代码示例
下面的代码就是一个HTTP1.0代理的协议处理部分的代码,没有认证(因为用的是IP地址认证,在进入这个代码之前就已经处理过了):
//servicethreadhttp.cpp#include "stdafx.h"
#include "mystd.h"
#include "Proxy.h"
#include "httpresp.h"extern CProxyApp theApp;//HTTP协议处理线程
DWORD ServiceThreadHttp(LPDWORD lpdwParam)
{
//--线程参数处理------------------------int cdindex;//连接数据索引struct ServiceData * servicedata;cdindex=((struct ThreadUserData *)lpdwParam)->index;servicedata=((struct ThreadUserData *)lpdwParam)->servicedata;
//--------------------------------------struct ConnectionData * cd;struct LogStruct * logs;cd=&servicedata->connectiondataarray.pconnectiondata[cdindex];if(-1!=cd->log){logs=servicedata->memlogfile.logstruct+cd->log;}else{logs=NULL;}
//----------------------------------------struct ConfigInfo * pci;pci=&servicedata->serviceconfigfiledata.configarray[cd->serviceindex];int headlen;int port;char host[256];char uri[256];unsigned long addr;SOCKADDR_IN sa;BOOL isTunnel=FALSE;//是否是隧道请求char tunnelresponse[]="HTTP/1.0 200 Connection established\x0d\x0a""Proxy-agent: FreeProxy 1.0\x0d\x0a\x0d\x0a";//退出?if(CONNECTIONDATA_CMD_QUIT==cd->cmd){closesocket(cd->sdc.s);if(-1!=cd->log){logs->state=LOGSTRUCT_STATE_NOUSE;}cd->state=CONNECTION_NOUSE;return (DWORD)-1;}//接收请求cd->sdc.bufcount=RecvHttpRequest(cd->sdc.s,cd->sdc.buf,BUFFERSIZE,&cd->cmd,&headlen,pci->islimitpost,1000*pci->maxpost);if(0>cd->sdc.bufcount){//DebugMessage("RecvHttpRequest失败");closesocket(cd->sdc.s);if(-1!=cd->log){logs->state=LOGSTRUCT_STATE_NOUSE;}cd->state=CONNECTION_NOUSE;return (DWORD)-1;}//分析请求
/* char tracertfile[256];if(-1!=mymemindex(cd->sdc.buf,cd->sdc.bufcount,"says=%2Fnick",strlen("says=%2Fnick"))){strcpy(tracertfile,"tracert_");itoa(cdindex,tracertfile+strlen(tracertfile),10);if(-1!=cd->log)WriteTracertFile(tracertfile,logs->username,strlen(logs->username));WriteTracertFile(tracertfile,cd->sdc.buf,cd->sdc.bufcount);}*/if(0>GetHttpURL(cd->sdc.buf,&cd->sdc.bufcount,headlen+4,host,256,&port,uri,256)){if(pci->isenableconnect && 0<=GetTunnelURL(cd->sdc.buf,&cd->sdc.bufcount,headlen+4,host,256,&port,uri,256)){//是隧道请求isTunnel=TRUE;if(-1!=cd->log){strcpy(logs->domainname,host);}}else{send(cd->sdc.s,httpresp400,strlen(httpresp400),0);closesocket(cd->sdc.s);if(-1!=cd->log){logs->state=LOGSTRUCT_STATE_NOUSE;}cd->state=CONNECTION_NOUSE;return (DWORD)-2;}}else{if(-1!=cd->log){strcpy(logs->domainname,host);}}ClearProxyInfo(cd->sdc.buf,&cd->sdc.bufcount);//检查目标许可if(IsForbidden(&theApp.bandata,host,uri)){send(cd->sdc.s,httpresp403,strlen(httpresp403),0);closesocket(cd->sdc.s);if(-1!=cd->log){logs->state=LOGSTRUCT_STATE_NOUSE;}cd->state=CONNECTION_NOUSE;return (DWORD)-1;}//退出?if(CONNECTIONDATA_CMD_QUIT==cd->cmd){closesocket(cd->sdc.s);if(-1!=cd->log){logs->state=LOGSTRUCT_STATE_NOUSE;}cd->state=CONNECTION_NOUSE;return (DWORD)-1;}//记录日志,计时开始if(-1!=cd->log){time(&logs->timestart);}//域名解析if(1!=GetAddrByHost(addr,host)){send(cd->sdc.s,httpresp600,strlen(httpresp600),0);closesocket(cd->sdc.s);if(-1!=cd->log){logs->state=LOGSTRUCT_STATE_NOUSE;}cd->state=CONNECTION_NOUSE;return (DWORD)-3;}memcpy(&(sa.sin_addr.S_un.S_addr),&addr,4);sa.sin_family=AF_INET;sa.sin_port=htons((unsigned short)port);//建立SOCKETif(INVALID_SOCKET==(cd->sdr.s=socket(AF_INET,SOCK_STREAM,0))){send(cd->sdc.s,httpresp601,strlen(httpresp601),0);closesocket(cd->sdc.s);if(-1!=cd->log){logs->state=LOGSTRUCT_STATE_NOUSE;}cd->state=CONNECTION_NOUSE;return (DWORD)-4;}//退出?if(CONNECTIONDATA_CMD_QUIT==cd->cmd){closesocket(cd->sdc.s);closesocket(cd->sdr.s);if(-1!=cd->log){logs->state=LOGSTRUCT_STATE_NOUSE;}cd->state=CONNECTION_NOUSE;return (DWORD)-1;}//连接if(SOCKET_ERROR==connect(cd->sdr.s,(struct sockaddr *)&sa,sizeof(sa))){send(cd->sdc.s,httpresp602,strlen(httpresp602),0);closesocket(cd->sdc.s);closesocket(cd->sdr.s);if(-1!=cd->log){logs->state=LOGSTRUCT_STATE_NOUSE;}cd->state=CONNECTION_NOUSE;return (DWORD)-5;}else{if(-1!=cd->log){strcpy(logs->domainname,uri);}}//退出?if(CONNECTIONDATA_CMD_QUIT==cd->cmd){closesocket(cd->sdc.s);closesocket(cd->sdr.s);if(-1!=cd->log){logs->state=LOGSTRUCT_STATE_NOUSE;}cd->state=CONNECTION_NOUSE;return (DWORD)-1;}//发送请求if(isTunnel){if(SOCKET_ERROR==send(cd->sdc.s,tunnelresponse,strlen(tunnelresponse),0)){send(cd->sdc.s,httpresp603,strlen(httpresp603),0);closesocket(cd->sdc.s);closesocket(cd->sdr.s);if(-1!=cd->log){logs->state=LOGSTRUCT_STATE_NOUSE;}cd->state=CONNECTION_NOUSE;return (DWORD)-6;}}if(SOCKET_ERROR==send(cd->sdr.s,cd->sdc.buf,cd->sdc.bufcount,0)){send(cd->sdc.s,httpresp603,strlen(httpresp603),0);closesocket(cd->sdc.s);closesocket(cd->sdr.s);if(-1!=cd->log){logs->state=LOGSTRUCT_STATE_NOUSE;}cd->state=CONNECTION_NOUSE;return (DWORD)-6;}//记录字节数if(-1!=cd->log){logs->bytecount+=cd->sdc.bufcount;}///TraceData(servicedata->isDataTrace,&servicedata->memlogfile.logdatatrace[cd->log].dc,cd->sdc.buf,cd->sdc.bufcount);//退出?if(CONNECTIONDATA_CMD_QUIT==cd->cmd){closesocket(cd->sdc.s);closesocket(cd->sdr.s);if(-1!=cd->log){logs->state=LOGSTRUCT_STATE_NOUSE;}cd->state=CONNECTION_NOUSE;return (DWORD)-1;}//接收数据并发给客户TransData(cd->sdr.s,cd->sdc.s,cd->sdr.buf,BUFFERSIZE,&cd->cmd,&cd->sdr.bufcount,servicedata,cd);//记录字节数if(-1!=cd->log){logs->bytecount+=cd->sdr.bufcount;}closesocket(cd->sdc.s);closesocket(cd->sdr.s);if(-1!=cd->log){time(&logs->timeend);logs->state=LOGSTRUCT_STATE_USED;}cd->state=CONNECTION_NOUSE;return 1;
}//接收HTTP请求(如果出错,不执行closesocket())
int RecvHttpRequest(SOCKET s,char * buf,int buflen,int * cmd,int* headlen,BOOL islimitpost,int maxpost)
{maxpost+=1;const char CRLF[]="\x0d\x0a";const char CRLFCRLF[]="\x0d\x0a\x0d\x0a";const char CONTENTLENGTH[]="Content-Length:";int recvcount=0;int temp;int recvall=0;BOOL tempbool;struct timeval timeout;timeout.tv_sec=0;timeout.tv_usec=100000;for(;1;){//退出?if(CONNECTIONDATA_CMD_QUIT==*cmd){return -1;}if(1!=IsSocketReadReady(s,timeout,tempbool)){return -2;}if(tempbool){recvcount=recv(s,buf+recvall,buflen-recvall,0);}else{continue;}if(SOCKET_ERROR==recvcount){return -3;}else if(0==recvcount){return -4;}recvall+=recvcount;//在使用后面代码段时使用CRLFCRLF,4,否则使用CRLF,2temp=mymemindex(buf,recvall,(char*)CRLFCRLF,4);if(-1!=temp){*headlen=temp;break;}}if(islimitpost && -1!=(temp=mymemindex(buf,*headlen,(char*)CONTENTLENGTH,15))){long i;char len[10];if(-1==(i=mymemindex(buf+temp,buflen-temp,(char*)CRLF,2))){return -5;}i-=strlen(CONTENTLENGTH);if(i>9){return -6;}memcpy(len,buf+temp+strlen(CONTENTLENGTH),i);len[i]='\0';i=atoi(len);if(i>maxpost){return -7;}}return recvall;
}//取得URL
int GetHttpURL(char* buf,int * buflenall,int buflen,char * host,int hostbuflen,int * port,char * uri,int uribuflen)
{const char CRLF[]="\x0d\x0a";int urlstart,urlend;int hoststart,hostend,hostlen;int portstart,portend,portlen;int pos;char str[10];urlend=mymemindex(buf,buflen,(char*)CRLF,2);if(-1==(urlstart=mymemindex(buf,urlend,"http://",7))){return -2; }if(urlend-urlstart>=uribuflen){memcpy(uri,buf+urlstart,uribuflen-1);uri[uribuflen-1]='\0';}else{memcpy(uri,buf+urlstart,urlend-urlstart);uri[urlend-urlstart]='\0';}//得到主机名起始位置hoststart=urlstart+7;if(-1==(pos=mymemindex(buf+hoststart,urlend-hoststart,"/",1))){return -3;}portend=pos+hoststart;pos=mymemindex(buf+hoststart,portend-hoststart,":",1);if(-1!=pos)//有端口{portstart=pos+hoststart+1;//得到端口起始位置hostend=pos+hoststart;portlen=portend-portstart;memcpy(str,buf+portstart,portlen);str[portlen]='\0';if(0==portlen) *port=80;//若端口长度为零,实际上无端口{if(0==(*port=atoi(str)))return -4;}}else//无端口{*port=80;hostend=portend;}hostlen=hostend-hoststart;if(hostlen>=hostbuflen)return -5;memcpy(host,buf+hoststart,hostlen);host[hostlen]='\0';//HTTP请求处理long i;//降版本1.1为1.0if('1'==buf[urlend-1]){buf[urlend-1]='0';}//擦去URLi=portend-urlstart;memmove(buf+urlstart,buf+portend,*buflenall-portend);*buflenall-=i;return hostlen;
}//取得隧道请求
int GetTunnelURL(char* buf,int * buflenall,int buflen,char * host,int hostbuflen,int * port,char * uri,int uribuflen)
{const char CRLF[]="\x0d\x0a";int urlstart,urlend;int hoststart,hostend,hostlen;int portstart,portend,portlen;int pos;char str[10];urlend=mymemindex(buf,buflen,(char*)CRLF,2);if(buflen<8 || 0!=memcmp(buf,"CONNECT",7))return -2;if(' '!=buf[7])return -2;for(urlstart=8;urlstart<buflen;urlstart++){if(' '!=buf[urlstart])break;}if(urlend>=uribuflen){memcpy(uri,buf,uribuflen-1);uri[uribuflen-1]='\0';}else{memcpy(uri,buf,urlend);uri[urlend]='\0';}//得到主机名起始位置hoststart=urlstart;if(-1==(pos=mymemindex(buf+hoststart,urlend-hoststart,"/",1))){return -3;}portend=pos+hoststart;pos=mymemindex(buf+hoststart,portend-hoststart,":",1);if(-1!=pos)//有端口{portstart=pos+hoststart+1;//得到端口起始位置hostend=pos+hoststart;portlen=portend-portstart;memcpy(str,buf+portstart,portlen);str[portlen]='\0';if(0==portlen) *port=80;//若端口长度为零,实际上无端口{if(0==(*port=atoi(str)))return -4;}}else//无端口{*port=80;hostend=portend;}hostlen=hostend-hoststart;if(hostlen>=hostbuflen)return -5;memcpy(host,buf+hoststart,hostlen);host[hostlen]='\0';//HTTP请求处理*buflenall=0;return hostlen;
}//清除代理信息
int ClearProxyInfo(char * buf,int * buflenall)
{const char PROXYCONNECTION[]="Proxy-Connection";const char CRLF[]="\x0d\x0a";int i,j;if(2>(i=mymemindex(buf,*buflenall,PROXYCONNECTION,strlen(PROXYCONNECTION))))return 1;//前面至少应有一个CRLFif(0!=memcmp(buf+i-2,CRLF,2))return 1;if(-1==(j=mymemindex(buf+i+strlen(PROXYCONNECTION),(*buflenall)-i-strlen(PROXYCONNECTION),CRLF,2))){j=(*buflenall)-i-strlen(PROXYCONNECTION);}//擦去代理信息memmove(buf+i-2,buf+i+strlen(PROXYCONNECTION)+j,(*buflenall)-(i+strlen(PROXYCONNECTION)+j));*buflenall-=2+strlen(PROXYCONNECTION)+j;return 1;
}
主要就是这么几件事:取出目标地址和端口,擦除目标信息,降低版本为1.0,擦除Proxy-XXXX头标,连接目标,双向转发数据。
这个代码是从实际项目中截取出来的。
(这里是结束)