起步于308的小菜菜

no money no dream coder 一枚

Python天气预报

利用python提取网页版的天气预报放入一个文件,并用shell提取今明两天的做fetion发送的内容,把脚本放入crontab shell脚本在这里略 code

(tianqi.py) download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python
#coding:utf-8
'''/*=============================================================================
#
# Author: mengdaiku - mengdaikun@gmail.com
#
# QQ : 646978600
#
# Last modified: 2013-05-20 16:11
#
# Filename: tianqi.py
#
# Description: 网络抓取网页天气预报并利用split整合发短信通知(发信脚本略) 
#
=============================================================================*/'''


import sys, re, urllib2, datetime
from HTMLParser import HTMLParser

class main(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)
        self.tarsum = ['td','th','br']
        self.readingtitle = 0
        self.curtag = None
        self.curattrs = 0
        self.marktag = None
        self.result = {}
        self.timeclock = 0
        for i in range(0,7):
            self.result[datetime.date.today() + datetime.timedelta(days=i)] = []
    def handle_starttag(self,tag,attrs):
        self.curtag = tag
        self.curattrs = len(attrs)
        if tag == 'table' and len(attrs) == 7:
            self.readingtitle = 1
            self.marktag = tag
        if self.readingtitle == 1:
            if tag == self.marktag:
                self.timeclock = -2
            elif tag == 'td':
                self.timeclock += 1
    def handle_data(self,data):
        if self.curtag in self.tarsum and self.curattrs == 0 and self.readingtitle == 1:
            days = datetime.date.today() + datetime.timedelta(days=self.timeclock)
           # self.result[days].append(data.strip())
            self.result[days].append(data.strip())
    def handle_endtag(self,tag):
        if tag == self.marktag:
            self.readingtitle = 0
    def getvalue(self):
        return self.result

if __name__ == '__main__':
    url = urllib2.urlopen('http://qq.ip138.com/weather/zhejiang/HangZhou.htm')
    page = url.read()
    page = unicode(page,'gb2312','ignore').encode('utf-8','ignore')
    line = main()
    line.feed(page)
    for key,value in line.getvalue.items():
        print key,
        for a in value:
            print a,
        print

Comments