//?Screen?scraping?your?way?into?RSS
//?Example?script,?by?Dennis?Pallett
//?http://phpit.net/tutorials/screenscrap-rss
//?Get?page
$url?=?"http://phpit.net/";
$data?=?implode("",?file($url));?
//?Get?content?items
preg_match_all?("/([^`]*?)<\/div>/",?$data,?$matches);
//?Begin?feed
header?("Content-Type:?text/xml;?charset=ISO-8859-1");
echo?"\n";
?>
??xmlns:dc="http://purl.org/dc/elements/1.1/"
??xmlns:content="http://purl.org/rss/1.0/modules/content/"
??xmlns:admin="http://webns.net/mvcb/"
??xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
????
????????PHPit?Latest?Content
????????The?latest?content?from?PHPit?(http://phpit.net),?screen?scraped!
????????http://phpit.net
????????en-us
//?Loop?through?each?content?item
foreach?($matches[0]?as?$match)?{
????//?First,?get?title
????preg_match?("/\">([^`]*?)<\/a><\/h3>/",?$match,?$temp);
????$title?=?$temp['1'];
????$title?=?strip_tags($title);
????$title?=?trim($title);
????//?Second,?get?url
????preg_match?("//",?$match,?$temp);
????$url?=?$temp['1'];
????$url?=?trim($url);
????//?Third,?get?text
????preg_match?("/([^`]*?)/"
,?$match,?$temp);
????$text?=?$temp['1'];
????$text?=?trim($text);
????//?Fourth,?and?finally,?get?author
????preg_match?("/By?([^`]*?)<\/span>/",?$match,?$temp);
????$author?=?$temp['1'];
????$author?=?trim($author);
????//?Echo?RSS?XML
????echo?"- \n"
;
????????echo?"\t\t\t"? .?strip_tags($title)?.?"\n";
????????echo?"\t\t\thttp://phpit.net"?.?strip_tags($url)?.?"\n";
????????echo?"\t\t\t"? .?strip_tags($text)?.?"\n";
????????echo?"\t\t\t;
????????echo?$text?.?"\n";
????????echo?"?]]> \n";
????????echo?"\t\t\t"? .?strip_tags($author)?.?"\n";
????echo?"\t\t\n";
}
?>