-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrss_parser.php
More file actions
114 lines (110 loc) · 5.31 KB
/
rss_parser.php
File metadata and controls
114 lines (110 loc) · 5.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
<?
include('parser.php');
function DOMinnerHTML(DOMNode $element)
{
$innerHTML = "";
$children = $element->childNodes;
foreach ($children as $child)
{
$innerHTML .= $element->ownerDocument->saveHTML($child);
}
return $innerHTML;
}
$rss_list = array(
"http://www.appledaily.com.tw/rss/newcreate/kind/rnews/type/new",
"http://tw.news.yahoo.com/rss/entertainment",
"http://tw.news.yahoo.com/rss/politics",
"http://tw.news.yahoo.com/rss/world",
"http://tw.news.yahoo.com/rss/local/",
"http://tw.news.yahoo.com/rss/health",
"http://tw.news.yahoo.com/sentiment/informative/rss",
"http://tw.news.yahoo.com/rss/sports",
"http://tw.news.yahoo.com/rss/society",
"http://tw.news.yahoo.com/rss/finance",
"http://tw.news.yahoo.com/rss/lifestyle",
"http://tw.news.yahoo.com/rss/art-edu",
"http://tw.news.yahoo.com/rss/technology"
);
if($_SERVER['REMOTE_ADDR'] == $_SERVER['SERVER_ADDR']){
?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Crawler Log</title>
</head>
<body>
<?
echo "<h1>News Crawler</h1>\n";
foreach($rss_list as $rss_referral){
$result = mysqli_query_new(Parser::$mysqli_link, "SELECT MAX(`publish_time`) as `max_date` FROM `news` WHERE `rss_referral` = '%s'", $rss_referral);
$result_array = mysqli_fetch_array($result);
$db_date = intval(strtotime($result_array['max_date']));
printf("<ol><li><a href='%s' target='_blank'>%s</a> - after %s</li>\n", $rss_referral, $rss_referral, date("Y-m-d H:i:s", $db_date));
$xml = file_get_contents($rss_referral);
$doc = @DOMDocument::loadXML($xml);
if($doc){
$items = $doc->getElementsByTagName('item');
foreach ($items as $item) {
$pub_date = strtotime($item->getElementsByTagName('pubDate')->item(0)->nodeValue);
if($pub_date <= $db_date){
continue;
}
$title = $item->getElementsByTagName('title')->item(0)->nodeValue;
if(strpos($rss_referral, 'www.appledaily.com.tw')!==false){
$url = $item->getElementsByTagName('link')->item(0)->nodeValue;
$parse_item = new AppleParser($url, $rss_referral);
}
if(strpos($rss_referral, 'tw.news.yahoo.com')!==false){
$url = "http://tw.news.yahoo.com/".$item->getElementsByTagName('guid')->item(0)->nodeValue;
if(substr($url, -5)!='.html'){
$url.=".html";
}
$parse_item = new YahooParser($url, $rss_referral);
}
if($parse_item->title){
$parse_item->toDB();
printf("<li><a href='%s' target='_blank'>%s</a> 完成</li>\n", $url, $title);
} else {
printf("<li>匯入失敗 <a href='%s' target='_blank'>%s</a></li>\n", $url, $title);
}
}
}
printf("</ol>\n");
}
include('crawler.peopo.php');
printf("現在時間:%s", date("Y-m-d H:i:s"));
?>
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"></script>
<script>
$('li:only-child').map(function(){
$(this.parentNode).appendTo('body');
});
setTimeout(function(){
location.reload();
}, 1800000);
</script>
</body>
</html>
<?
}
/*
<title>野柳自然中心 新環教場域啟用
</title>
<description><p><a href="http://tw.news.yahoo.com/%E9%87%8E%E6%9F%B3%E8%87%AA%E7%84%B6%E4%B8%AD%E5%BF%83-%E6%96%B0%E7%92%B0%E6%95%99%E5%A0%B4%E5%9F%9F%E5%95%9F%E7%94%A8-040353121.html"><img src="http://l1.yimg.com/bt/api/res/1.2/0j0xCXzwuhMtGdH3cz9Ysw--/YXBwaWQ9eW5ld3M7Zmk9ZmlsbDtoPTg2O3E9NzU7dz0xMzA-/http://media.zenfs.com/en_us/News/travelrich/Info_NewsPic15085b1.jpg" width="130" height="86" alt="野柳自然中心 新環教場域啟用." align="left" title="野柳自然中心 新環教場域啟用." border="0" /></a>野柳方榮獲行政院農委會林務局及台大地理系合辦的十大地景民眾票選及專家學者評分第一名!野柳在自然環教中心於101年11月 ...</p><br clear="all"/>
</description>
<link>
<pubDate>Tue, 29 Oct 2013 12:03:53 +0800
</pubDate>
<source url="http://www.travelrich.com.tw/">旅遊經
</source>
<guid isPermaLink="false">%E9%87%8E%E6%9F%B3%E8%87%AA%E7%84%B6%E4%B8%AD%E5%BF%83-%E6%96%B0%E7%92%B0%E6%95%99%E5%A0%B4%E5%9F%9F%E5%95%9F%E7%94%A8-040353121
</guid>
<media:content url="http://l1.yimg.com/bt/api/res/1.2/0j0xCXzwuhMtGdH3cz9Ysw--/YXBwaWQ9eW5ld3M7Zmk9ZmlsbDtoPTg2O3E9NzU7dz0xMzA-/http://media.zenfs.com/en_us/News/travelrich/Info_NewsPic15085b1.jpg" type="image/jpeg" width="130" height="86">
</media:content>
<media:text type="html"><p><a href="http://tw.news.yahoo.com/%E9%87%8E%E6%9F%B3%E8%87%AA%E7%84%B6%E4%B8%AD%E5%BF%83-%E6%96%B0%E7%92%B0%E6%95%99%E5%A0%B4%E5%9F%9F%E5%95%9F%E7%94%A8-040353121.html"><img src="http://l1.yimg.com/bt/api/res/1.2/0j0xCXzwuhMtGdH3cz9Ysw--/YXBwaWQ9eW5ld3M7Zmk9ZmlsbDtoPTg2O3E9NzU7dz0xMzA-/http://media.zenfs.com/en_us/News/travelrich/Info_NewsPic15085b1.jpg" width="130" height="86" alt="野柳自然中心 新環教場域啟用." align="left" title="野柳自然中心 新環教場域啟用." border="0" /></a>野柳方榮獲行政院農委會林務局及台大地理系合辦的十大地景民眾票選及專家學者評分第一名!野柳在自然環教中心於101年11月 ...</p><br clear="all"/>
</media:text>
<media:credit role="publishing company">
</media:credit>
*/
?>