Python开发Http代理服务器 – socketref,再见!高德 – C++博客

Python开发Http代理服务器 - socketref,再见!高德 - C++博客
Python开发Http代理服务器之前开发酒店广告投放系统编写的Http代理服务程序,功能实现广告插播进Html DOM结构内。一般都是DIV被插入,当然包括script都是可以从数据库中动态获得。

简单修改之后当做Http代理服务器程序,在浏览器中设置Http转发程序的Ip即可,只要代理程序的机器能上网,客户机便能上网(其中涉及Page gzip的工作有点麻烦)

1#-- coding:utf-8 --

2#http代理服务器

3#1.ip限制,mac限制

4#

5#socketref@hotmail.com

6#www.sw2us.com

7

8"exec""python""-O""$0""$@"

9

10doc="""sw2us HTTP Proxy.

11

12"""

13

14version="0.2.1"

15

16importBaseHTTPServer, select, socket, SocketServer, urlparse

17importhttplib,traceback,re

18importos,sys,re,mimetools,zlib,StringIO,gzip,time,StringIO

19

20

21classConfigProperty:

22definit(self,owner):

23self.key=''

24self.value=''

25

26defcreate(self,text):

27#text - key=value

28#@return: boolean

29pos=text.find('#')

30if(pos!=-1):

31text=text[:pos]

32pair=text.split('=')

33iflen(pair)!=2:

34#print "Property Line Invalid:%s"%(text)

35returnFalse

36k=pair[0].strip()

37v=pair[1].strip()

38self.key=k

39self.value=v

40

41returnTrue

42

43deftoString(self):

44s=''

45try:

46s="%s=%s"%(self.key,self.value)

47except:

48return''

49returns

50

51deftoInt(self):

52r=0

53try:

54r=int(self.value)

55except:

56r=0

57returnr

58

59deftoFloat(self):

60r=0.0

61try:

62r=float(self.value)

63except:

64r=0.0

65returnr

66

67

68#@def SimpleConfig

69#简单配置信息文件,基本格式 : key=value

70classSimpleConfig:

71definit(self):

72self._file=''

73self._props=[]

74self._strip=True

75

76defopen(self,file,strip=True):

77#打开配置文件

78#@param strip - 是否裁剪不可见首尾两端的字符

79try:

80self._strip=strip

81self._props=[]

82fh=open(file,'r')

83lines=fh.readlines()

84fortextinlines:

85prop=ConfigProperty(self)

86ifprop.create(text)==False:

87prop=None

88else:

89self._props.append(prop)

90fh.close()

91except:

92returnFalse

93returnTrue

94

95deftoString(self):

96s=''

97forpinself._props:

98s=s+p.toString()+"\n"

99returns

100

101defsaveAs(self,file):

102#保存配置信息到文件

103try:

104fh=open(file,'w')

105fh.write(toString())

106fh.close()

107except:

108print"write File Failed!"

109returnFalse

110returnTrue

111

112defgetProperty(self,name):

113#取属性值

114prop=None

115try:

116forpinself._props:

117ifp.key==name:

118prop=p

119break

120except:

121pass

122

123returnprop

124

125defgetPropertyValue(self,key,default=''):

126prop=self.getProperty(key)

127ifnotprop:

128returndefault

129returnprop.value

130

131defgetPropertyValueAsInt(self,name,default=0):

132prop=self.getPropertyValue(name)

133

134ifnotprop:

135returndefault

136r=default

137try:

138r=int(prop)

139except:pass

140returnr

141

142defgetPropertyValueAsFloat(self,name,default=0.0):

143prop=self.getPropertyValue(name)

144ifnotprop:

145returndefault

146r=default

147try:

148r=float(r)

149except:pass

150returnr

151

152

153#===========================================#

154

155

156#===========================================#

157

158defgetMacList():

159maclist=[]

160f=os.popen('arp -a','r')

161whileTrue:

162line=f.readline()

163ifnotline:

164break

165line=line.strip()

166rst=re.match('^(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})\s+([0-9a-fA-F]{1,2}-[0-9a-fA-F]{1,2}-[0-9a-fA-F]{1,2}-[0-9a-fA-F]{1,2}-[0-9a-fA-F]{1,2}-[0-9a-fA-F]{1,2}).',line)

167#rst = re.match('^(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})',line)

168ifrst:

169#print rst.groups()

170maclist.append(rst.groups())

171#print maclist

172returnmaclist

173

174

175

176##########################################

177confile=SimpleConfig()

178confile.open('proxy.conf')

179dbconn=None

180

181##########################################

182#初始化系统配置

183definitConfiguration():

184r=True

185

186returnr

187

188##########################################

189

190classProxyHandler (BaseHTTPServer.BaseHTTPRequestHandler):

191base=BaseHTTPServer.BaseHTTPRequestHandler

192__base_handle=__base.handle

193server_version="TinyHTTPProxy/"+__version


194rbufsize=0#self.rfile Be unbuffered

195

196

197#######################################################33

198

199#handle()是在单独线程中执行

200defhandle(self):#调用入口,线程刚进入,携带socket进入

201print'client incomingPython开发Http代理服务器 - socketref,再见!高德 - C++博客'

202#self.__base_handle()

203#return

204(ip, port)=self.client_address

205ifhasattr(self,'allowed_clients')andipnotinself.allowed_clients:

206self.raw_requestline=self.rfile.readline()

207ifself.parse_request():

208self.send_error(403)

209else:

210self.__base_handle()

211

212def_connect_to(self, netloc, soc):

213i=netloc.find(':')

214ifi>=0:

215host_port=netloc[:i], int(netloc[i+1:])

216else:

217host_port=netloc,80

218#print "\t" "connect to %s:%d" % host_port

219try: soc.connect(host_port)

220exceptsocket.error, arg:

221try: msg=arg[1]

222except: msg=arg

223self.send_error(404, msg)

224return0

225return1

226

227defdo_CONNECT(self):

228soc=socket.socket(socket.AF_INET, socket.SOCK_STREAM)

229try:

230ifself._connect_to(self.path, soc):

231self.log_request(200)

232self.wfile.write(self.protocol_version+

233"200 Connection established\r\n")

234self.wfile.write("Proxy-agent: %s\r\n"%self.version_string())

235self.wfile.write("\r\n")

236self._read_write(soc,300)

237finally:

238print"\t""bye"

239soc.close()

240self.connection.close()

241

242

243defdo_GET(self):

244(scm, netloc, path, params, query, fragment)=urlparse.urlparse(

245self.path,'http')

246piars=(scm, netloc, path, params, query, fragment)

247ifnotnetloc:

248netloc=self.headers.get('Host',"")

249#print ">>requester:",self.connection.getpeername(),"path:",self.path

250#print '>>2. ',(scm, netloc, path, params, query, fragment)

251#print 'next host:',netloc

252ifscm!='http'orfragmentornotnetloc:

253self.send_error(400,"bad url %s"%self.path)

254return

255soc=socket.socket(socket.AF_INET, socket.SOCK_STREAM)

256try:

257ifself._connect_to(netloc, soc):

258self.log_request()

259soc.send("%s %s %s\r\n"%(

260self.command,

261urlparse.urlunparse(('','', path, params, query,'')),

262self.request_version))

263self.headers['Connection']='close'

264delself.headers['Proxy-Connection']

265forkey_valinself.headers.items():

266soc.send("%s: %s\r\n"%key_val)

267soc.send("\r\n")

268#到此完成发送请求和头部信息

269self._read_write(soc)

270finally:

271print"\t""bye"

272soc.close()

273self.connection.close()

274

275

276

277definsertTags(self,tag,body,insert):

278p1=body.find('<%s'%tag)

279ifp1!=-1:

280p2=body.find('>',p1)

281ifp2!=-1:

282part1=body[:p2+1]

283part2=body[p2+1:]

284print'
-'20

285body=part1+insert+part2

286returnbody

287

288#google页面的数据请求时,返回的数据进行的是gzip压缩,所以过滤文本存在问题,先要解压缩之后才可以

289#插入数据之后要重新计算 content-length 并返回给客户浏览器

290#发现压缩的有很多 , content-encoding:gzip

291

292#处理 'transfer-encoding': 'chunked'类型

293#gzip 有两种存储,一种是直接gzip压缩的数据跟在header之后;另外一种是采用chunck块存储

294#在这里将gzip数据全部解压,还原成原始数据传出到客户端

295defsendBackResponse(self,command,headers,body):

296

297insert='

This is Test

'

298ifheaders.has_key('content-encoding')andheaders['content-encoding'].strip().lower()=='gzip':

299try:

300delheaders['content-encoding']

301gzipdata=''

302ifheaders.has_key('transfer-encoding')andheaders['transfer-encoding']=='chunked':

303delheaders['transfer-encoding']

304

305pos=0

306whilepos<len(body):

307p=body.find('\x0d\x0a',pos)

308sizewidth=p-pos

309

310chuncksize=int(body[pos:p],16)

311#print 'chunck size:',body[pos:p]

312p+=2

313gzipdata+=body[p:p+chuncksize]

314pos=p+chuncksize+2

315ifchuncksize==0 :

316break

317#

318body=gzipdata

319

320#

321

322#ss = zlib.decompress(gzipdata)

323compressedstream=StringIO.StringIO(body)

324gzipper=gzip.GzipFile(fileobj=compressedstream)

325ifgzipper==None:

326print'
'200

327body=gzipper.read()

328#f = open('body%s.txt'%time.time(),'wb')

329#f.write(body)

330#f.close()

331

332

333#body = gzipdata

334except:

335printtraceback.print_exc()

336print'decompress failed!'

337#pos = body.find('\x0d\x0a')

338#pos = body.find('\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff')

339#if pos!=-1:

340#body = body[pos+9:]

341#

342#compressedstream = StringIO.StringIO(body)

343#gzipper = gzip.GzipFile(fileobj=compressedstream)

344#if gzipper == None:

345#print '
'200

346#body = gzipper.read()

347

348#body = zlib.decompressobj().decompress('x\x9c'+body)

349

350#m = re.search('()',body,re.I)

351#if m:

352#pos = m.start(0)

353#part1 = body[:pos+len(m.group(0))]

354#part2 = body[pos+len(m.group(0)):]

355#body = part1 + insert + part2

356#print '-
'20,insert,'-'20

357

358#self.insertTags('body',body,insert)

359

360css="""


369"""

370#body =self.insertTags('head',body,css)

371

372#body =self.insertTags('body',body,insert)

373div="""

374


375This is Test DIV Block!!

376


377"""

378

379#read external html tags

380try:

381#ff = open('head.tag','r')

382#div = ff.read()

383#ff.close()

384#body =self.insertTags('head',body,div)

385body=self.publish_advertisement(body)#插入配置的广告信息

386except:

387pass

388

389#p1 = body.find('\\
\
\
\390\\\\#\\if p1!=-1 :\\\
\
\
\391\\\\#\\p2 = body.find('>',p1)

392#if p2!=-1:

393#part1 = body[:p2+1]

394#part2 = body[p2+1:]

395#print '
-'20

396#body = part1 + insert + part2

397#print m.group(0)

398headers['Content-Length']=str(len(body))

399

400#if headers.has_key('content-length'):

401

402self.connection.send(command)

403self.connection.send('\r\n')

404fork,vinheaders.items():

405self.connection.send("%s: %s\r\n"%(k,v))

406self.connection.send("\r\n")

407self.connection.sendall(body)

408

409

410

411#----------------------------------------------------

412

413defread_write(self, soc, max_idling=20):

414#getMacList()

415iw=[self.connection, soc]#self.connnection - 内网主机连接,soc - 向外连接

416ow=[]

417count=0

418#respfile = soc.makefile('rb', 1024)

419httpCommand=''

420httpBody=''

421httpHeaders={}

422isOkPageResponse=False

423nextReadBytes=0

424datacnt=0

425NoContentLength=False

426#print self.connection.getpeername()

427while1:

428count+=1

429datacnt+=1

430(ins,
, exs)=select.select(iw, ow, iw,3)

431ifexs:

432print'error occr!'

433break#异常产生

434ifins:

435foriinins:

436ifiissoc:

437out=self.connection

438else:

439out=soc

440

441data=i.recv(8192)

442ifdata:

443out.send(data)

444count=0

445else:

446ifnotisOkPageResponse:

447return

448else:

449pass#print "\t" "idle", count

450ifcount==max_idling:

451print'idling exitPython开发Http代理服务器 - socketref,再见!高德 - C++博客'

452break#指定时间内都接收不到双向数据便退出循环 20
3 = 60 secs

453

454

455do_HEAD=do_GET

456do_POST=do_GET

457do_PUT=do_GET

458do_DELETE=do_GET

459

460classThreadingHTTPServer (SocketServer.ThreadingMixIn,

461BaseHTTPServer.HTTPServer):pass

462

463

464

465

466defserving(HandlerClass,

467ServerClass, protocol="HTTP/1.0"):

468

469iflen(sys.argv)<2orsys.argv[1]!='www.sw2us.com':

470sys.exit()

471

472ifsys.argv[2:]:

473port=int(sys.argv[2])

474else:

475

476port=confile.getPropertyValueAsInt('httpport',8000)

477

478#port = 8000

479

480server_address=('', port)

481

482HandlerClass.protocol_version=protocol

483httpd=ServerClass(server_address, HandlerClass)

484

485sa=httpd.socket.getsockname()

486print"www.sw2us.com@2010 v.1.0.0"

487print"Serving HTTP on", sa[0],"port", sa[1],"Python开发Http代理服务器 - socketref,再见!高德 - C++博客"

488sys.stdout=buff

489sys.stderr=buff

490

491httpd.serve_forever()

492

493

494

495ifname=='main':

496#getMacList()

497fromsysimportargv

498

499f=open('proxy.pid','w')

500f.write(str(os.getpid()))

501f.close()

502

503#ProxyHandler.allowed_clients = []

504try:

505allowed=[]

506ss=confile.getPropertyValue('allowed_clients').strip()

507hosts=ss.split(',')

508forhinhosts:

509ifh:

510client=socket.gethostbyname(h.strip())

511allowed.append(client)

512iflen(allowed):

513ProxyHandler.allowed_clients=allowed

514buff=StringIO.StringIO()

515

516serving(ProxyHandler, ThreadingHTTPServer)

517except:

518pass

519

520原文链接: https://www.cnblogs.com/lexus/archive/2013/01/08/2851660.html

欢迎关注

微信关注下方公众号,第一时间获取干货硬货;公众号内回复【pdf】免费获取数百本计算机经典书籍

原创文章受到原创版权保护。转载请注明出处:https://www.ccppcoding.com/archives/74881

非原创文章文中已经注明原地址,如有侵权,联系删除

关注公众号【高性能架构探索】,第一时间获取最新文章

转载文章受原作者版权保护。转载请注明原作者出处!

(0)
上一篇 2023年2月9日 下午4:37
下一篇 2023年2月9日 下午4:38

相关推荐