Python开发Http代理服务器 - socketref,再见!高德 - C++博客
Python开发Http代理服务器之前开发酒店广告投放系统编写的Http代理服务程序,功能实现广告插播进Html DOM结构内。一般都是DIV被插入,当然包括script都是可以从数据库中动态获得。
简单修改之后当做Http代理服务器程序,在浏览器中设置Http转发程序的Ip即可,只要代理程序的机器能上网,客户机便能上网(其中涉及Page gzip的工作有点麻烦)
1#-- coding:utf-8 --
2#http代理服务器
3#1.ip限制,mac限制
4#
5#socketref@hotmail.com
6#www.sw2us.com
7
8"exec""python""-O""$0""$@"
9
10doc="""sw2us HTTP Proxy.
11
12"""
13
14version="0.2.1"
15
16importBaseHTTPServer, select, socket, SocketServer, urlparse
17importhttplib,traceback,re
18importos,sys,re,mimetools,zlib,StringIO,gzip,time,StringIO
19
20
21classConfigProperty:
22definit(self,owner):
23self.key=''
24self.value=''
25
26defcreate(self,text):
27#text - key=value
28#@return: boolean
29pos=text.find('#')
30if(pos!=-1):
31text=text[:pos]
32pair=text.split('=')
33iflen(pair)!=2:
34#print "Property Line Invalid:%s"%(text)
35returnFalse
36k=pair[0].strip()
37v=pair[1].strip()
38self.key=k
39self.value=v
40
41returnTrue
42
43deftoString(self):
44s=''
45try:
46s="%s=%s"%(self.key,self.value)
47except:
48return''
49returns
50
51deftoInt(self):
52r=0
53try:
54r=int(self.value)
55except:
56r=0
57returnr
58
59deftoFloat(self):
60r=0.0
61try:
62r=float(self.value)
63except:
64r=0.0
65returnr
66
67
68#@def SimpleConfig
69#简单配置信息文件,基本格式 : key=value
70classSimpleConfig:
71definit(self):
72self._file=''
73self._props=[]
74self._strip=True
75
76defopen(self,file,strip=True):
77#打开配置文件
78#@param strip - 是否裁剪不可见首尾两端的字符
79try:
80self._strip=strip
81self._props=[]
82fh=open(file,'r')
83lines=fh.readlines()
84fortextinlines:
85prop=ConfigProperty(self)
86ifprop.create(text)==False:
87prop=None
88else:
89self._props.append(prop)
90fh.close()
91except:
92returnFalse
93returnTrue
94
95deftoString(self):
96s=''
97forpinself._props:
98s=s+p.toString()+"\n"
99returns
100
101defsaveAs(self,file):
102#保存配置信息到文件
103try:
104fh=open(file,'w')
105fh.write(toString())
106fh.close()
107except:
108print"write File Failed!"
109returnFalse
110returnTrue
111
112defgetProperty(self,name):
113#取属性值
114prop=None
115try:
116forpinself._props:
117ifp.key==name:
118prop=p
119break
120except:
121pass
122
123returnprop
124
125defgetPropertyValue(self,key,default=''):
126prop=self.getProperty(key)
127ifnotprop:
128returndefault
129returnprop.value
130
131defgetPropertyValueAsInt(self,name,default=0):
132prop=self.getPropertyValue(name)
133
134ifnotprop:
135returndefault
136r=default
137try:
138r=int(prop)
139except:pass
140returnr
141
142defgetPropertyValueAsFloat(self,name,default=0.0):
143prop=self.getPropertyValue(name)
144ifnotprop:
145returndefault
146r=default
147try:
148r=float(r)
149except:pass
150returnr
151
152
153#===========================================#
154
155
156#===========================================#
157
158defgetMacList():
159maclist=[]
160f=os.popen('arp -a','r')
161whileTrue:
162line=f.readline()
163ifnotline:
164break
165line=line.strip()
166rst=re.match('^(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})\s+([0-9a-fA-F]{1,2}-[0-9a-fA-F]{1,2}-[0-9a-fA-F]{1,2}-[0-9a-fA-F]{1,2}-[0-9a-fA-F]{1,2}-[0-9a-fA-F]{1,2}).',line)
167#rst = re.match('^(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})',line)
168ifrst:
169#print rst.groups()
170maclist.append(rst.groups())
171#print maclist
172returnmaclist
173
174
175
176##########################################
177confile=SimpleConfig()
178confile.open('proxy.conf')
179dbconn=None
180
181##########################################
182#初始化系统配置
183definitConfiguration():
184r=True
185
186returnr
187
188##########################################
189
190classProxyHandler (BaseHTTPServer.BaseHTTPRequestHandler):
191base=BaseHTTPServer.BaseHTTPRequestHandler
192__base_handle=__base.handle
193server_version="TinyHTTPProxy/"+__version
194rbufsize=0#self.rfile Be unbuffered
195
196
197#######################################################33
198
199#handle()是在单独线程中执行
200defhandle(self):#调用入口,线程刚进入,携带socket进入
201print'client incoming'
202#self.__base_handle()
203#return
204(ip, port)=self.client_address
205ifhasattr(self,'allowed_clients')andipnotinself.allowed_clients:
206self.raw_requestline=self.rfile.readline()
207ifself.parse_request():
208self.send_error(403)
209else:
210self.__base_handle()
211
212def_connect_to(self, netloc, soc):
213i=netloc.find(':')
214ifi>=0:
215host_port=netloc[:i], int(netloc[i+1:])
216else:
217host_port=netloc,80
218#print "\t" "connect to %s:%d" % host_port
219try: soc.connect(host_port)
220exceptsocket.error, arg:
221try: msg=arg[1]
222except: msg=arg
223self.send_error(404, msg)
224return0
225return1
226
227defdo_CONNECT(self):
228soc=socket.socket(socket.AF_INET, socket.SOCK_STREAM)
229try:
230ifself._connect_to(self.path, soc):
231self.log_request(200)
232self.wfile.write(self.protocol_version+
233"200 Connection established\r\n")
234self.wfile.write("Proxy-agent: %s\r\n"%self.version_string())
235self.wfile.write("\r\n")
236self._read_write(soc,300)
237finally:
238print"\t""bye"
239soc.close()
240self.connection.close()
241
242
243defdo_GET(self):
244(scm, netloc, path, params, query, fragment)=urlparse.urlparse(
245self.path,'http')
246piars=(scm, netloc, path, params, query, fragment)
247ifnotnetloc:
248netloc=self.headers.get('Host',"")
249#print ">>requester:",self.connection.getpeername(),"path:",self.path
250#print '>>2. ',(scm, netloc, path, params, query, fragment)
251#print 'next host:',netloc
252ifscm!='http'orfragmentornotnetloc:
253self.send_error(400,"bad url %s"%self.path)
254return
255soc=socket.socket(socket.AF_INET, socket.SOCK_STREAM)
256try:
257ifself._connect_to(netloc, soc):
258self.log_request()
259soc.send("%s %s %s\r\n"%(
260self.command,
261urlparse.urlunparse(('','', path, params, query,'')),
262self.request_version))
263self.headers['Connection']='close'
264delself.headers['Proxy-Connection']
265forkey_valinself.headers.items():
266soc.send("%s: %s\r\n"%key_val)
267soc.send("\r\n")
268#到此完成发送请求和头部信息
269self._read_write(soc)
270finally:
271print"\t""bye"
272soc.close()
273self.connection.close()
274
275
276
277definsertTags(self,tag,body,insert):
278p1=body.find('<%s'%tag)
279ifp1!=-1:
280p2=body.find('>',p1)
281ifp2!=-1:
282part1=body[:p2+1]
283part2=body[p2+1:]
284print'-'20
285body=part1+insert+part2
286returnbody
287
288#google页面的数据请求时,返回的数据进行的是gzip压缩,所以过滤文本存在问题,先要解压缩之后才可以
289#插入数据之后要重新计算 content-length 并返回给客户浏览器
290#发现压缩的有很多 , content-encoding:gzip
291
292#处理 'transfer-encoding': 'chunked'类型
293#gzip 有两种存储,一种是直接gzip压缩的数据跟在header之后;另外一种是采用chunck块存储
294#在这里将gzip数据全部解压,还原成原始数据传出到客户端
295defsendBackResponse(self,command,headers,body):
296
297insert='
This is Test
'
298ifheaders.has_key('content-encoding')andheaders['content-encoding'].strip().lower()=='gzip':
299try:
300delheaders['content-encoding']
301gzipdata=''
302ifheaders.has_key('transfer-encoding')andheaders['transfer-encoding']=='chunked':
303delheaders['transfer-encoding']
304
305pos=0
306whilepos<len(body):
307p=body.find('\x0d\x0a',pos)
308sizewidth=p-pos
309
310chuncksize=int(body[pos:p],16)
311#print 'chunck size:',body[pos:p]
312p+=2
313gzipdata+=body[p:p+chuncksize]
314pos=p+chuncksize+2
315ifchuncksize==0 :
316break
317#
318body=gzipdata
319
320#
321
322#ss = zlib.decompress(gzipdata)
323compressedstream=StringIO.StringIO(body)
324gzipper=gzip.GzipFile(fileobj=compressedstream)
325ifgzipper==None:
326print''200
327body=gzipper.read()
328#f = open('body%s.txt'%time.time(),'wb')
329#f.write(body)
330#f.close()
331
332
333#body = gzipdata
334except:
335printtraceback.print_exc()
336print'decompress failed!'
337#pos = body.find('\x0d\x0a')
338#pos = body.find('\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff')
339#if pos!=-1:
340#body = body[pos+9:]
341#
342#compressedstream = StringIO.StringIO(body)
343#gzipper = gzip.GzipFile(fileobj=compressedstream)
344#if gzipper == None:
345#print ''200
346#body = gzipper.read()
347
348#body = zlib.decompressobj().decompress('x\x9c'+body)
349
350#m = re.search('(
351#if m:
352#pos = m.start(0)
353#part1 = body[:pos+len(m.group(0))]
354#part2 = body[pos+len(m.group(0)):]
355#body = part1 + insert + part2
356#print '-'20,insert,'-'20
357
358#self.insertTags('body',body,insert)
359
360css="""
369"""
370#body =self.insertTags('head',body,css)
371
372#body =self.insertTags('body',body,insert)
373div="""
374
376
377"""
378
379#read external html tags
380try:
381#ff = open('head.tag','r')
382#div = ff.read()
383#ff.close()
384#body =self.insertTags('head',body,div)
385body=self.publish_advertisement(body)#插入配置的广告信息
386except:
387pass
388
389#p1 = body.find('\\
\\\390\\\\#\\if p1!=-1 :\\\
\\\391\\\\#\\p2 = body.find('>',p1)
392#if p2!=-1:
393#part1 = body[:p2+1]
394#part2 = body[p2+1:]
395#print '-'20
396#body = part1 + insert + part2
397#print m.group(0)
398headers['Content-Length']=str(len(body))
399
400#if headers.has_key('content-length'):
401
402self.connection.send(command)
403self.connection.send('\r\n')
404fork,vinheaders.items():
405self.connection.send("%s: %s\r\n"%(k,v))
406self.connection.send("\r\n")
407self.connection.sendall(body)
408
409
410
411#----------------------------------------------------
412
413defread_write(self, soc, max_idling=20):
414#getMacList()
415iw=[self.connection, soc]#self.connnection - 内网主机连接,soc - 向外连接
416ow=[]
417count=0
418#respfile = soc.makefile('rb', 1024)
419httpCommand=''
420httpBody=''
421httpHeaders={}
422isOkPageResponse=False
423nextReadBytes=0
424datacnt=0
425NoContentLength=False
426#print self.connection.getpeername()
427while1:
428count+=1
429datacnt+=1
430(ins, , exs)=select.select(iw, ow, iw,3)
431ifexs:
432print'error occr!'
433break#异常产生
434ifins:
435foriinins:
436ifiissoc:
437out=self.connection
438else:
439out=soc
440
441data=i.recv(8192)
442ifdata:
443out.send(data)
444count=0
445else:
446ifnotisOkPageResponse:
447return
448else:
449pass#print "\t" "idle", count
450ifcount==max_idling:
451print'idling exit'
452break#指定时间内都接收不到双向数据便退出循环 203 = 60 secs
453
454
455do_HEAD=do_GET
456do_POST=do_GET
457do_PUT=do_GET
458do_DELETE=do_GET
459
460classThreadingHTTPServer (SocketServer.ThreadingMixIn,
461BaseHTTPServer.HTTPServer):pass
462
463
464
465
466defserving(HandlerClass,
467ServerClass, protocol="HTTP/1.0"):
468
469iflen(sys.argv)<2orsys.argv[1]!='www.sw2us.com':
470sys.exit()
471
472ifsys.argv[2:]:
473port=int(sys.argv[2])
474else:
475
476port=confile.getPropertyValueAsInt('httpport',8000)
477
478#port = 8000
479
480server_address=('', port)
481
482HandlerClass.protocol_version=protocol
483httpd=ServerClass(server_address, HandlerClass)
484
485sa=httpd.socket.getsockname()
486print"www.sw2us.com@2010 v.1.0.0"
487print"Serving HTTP on", sa[0],"port", sa[1],""
488sys.stdout=buff
489sys.stderr=buff
490
491httpd.serve_forever()
492
493
494
495ifname=='main':
496#getMacList()
497fromsysimportargv
498
499f=open('proxy.pid','w')
500f.write(str(os.getpid()))
501f.close()
502
503#ProxyHandler.allowed_clients = []
504try:
505allowed=[]
506ss=confile.getPropertyValue('allowed_clients').strip()
507hosts=ss.split(',')
508forhinhosts:
509ifh:
510client=socket.gethostbyname(h.strip())
511allowed.append(client)
512iflen(allowed):
513ProxyHandler.allowed_clients=allowed
514buff=StringIO.StringIO()
515
516serving(ProxyHandler, ThreadingHTTPServer)
517except:
518pass
519
520原文链接: https://www.cnblogs.com/lexus/archive/2013/01/08/2851660.html
欢迎关注
微信关注下方公众号,第一时间获取干货硬货;公众号内回复【pdf】免费获取数百本计算机经典书籍
原创文章受到原创版权保护。转载请注明出处:https://www.ccppcoding.com/archives/74881
非原创文章文中已经注明原地址,如有侵权,联系删除
关注公众号【高性能架构探索】,第一时间获取最新文章
转载文章受原作者版权保护。转载请注明原作者出处!