Revision: 69126
Updated Code
at April 23, 2015 07:05 by Firsh
Updated Code
function scrape_youtube($rss_url, $limit){ $limit = $limit === 0 ? -1 : $limit; if(stripos($rss_url, 'gdata.youtube.com') !== false || stripos($rss_url, 'youtube.com/user/') !== false){ return $this->scrape_youtube_channel($rss_url, $limit); }elseif(stripos($rss_url, 'list=') !== false){ return $this->scrape_youtube_playlist($rss_url, $limit); }else{ return __('YouTube source could not be determined.', 'jig_td'); } } function scrape_youtube_playlist($rss_url, $limit){ if (preg_match('/(?<=list=)[^&#?\s]*/im', $rss_url, $regs)) { $url = "https://www.youtube.com/playlist?list=".$regs[0]."&hl=en"; }else{ return __('YouTube playlist ID could not be determined.', 'jig_td'); } $host = !is_ssl() ? 'http://www.youtube.com' : 'https://www.youtube.com'; //$author = preg_replace('#^(https?://[^/])/user/([^/]+).*#', '$1', $url); $html = $this->file_get_contents_curl($url); $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); $doc = new DOMDocument(); @$doc->loadHTML($html); $xpath = new DOMXpath($doc); $videos = $xpath->query('//tr[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile ")]'); $rss_items = array(); $count = 0; if (!empty($videos)) { foreach ($videos as $video) { if($count == $limit){ break; } $anchor = $xpath->query('.//a[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile-link ")][starts-with(@href, "/watch")]',$video)->item(0); $ownerAnchor = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " pl-video-owner ")]/a',$video)->item(0); $rss_item = new JIGstdClass(); $rss_item->get_title = trim($anchor->nodeValue); if($rss_item->get_title == "[Private Video]" || $rss_item->get_title == "[Deleted Video]"){ continue; } $rss_item->get_description = (!empty($ownerAnchor) ? __('by','jig_td').' <a href="'.$host.$ownerAnchor->getAttribute('href').'" target="_blank">'.trim($ownerAnchor->nodeValue).'</a>' : ''); $rss_item->get_date = __("No date available.","jig_td"); $rss_item->get_enclosures = array(); $rss_item->get_enclosures[] = new JIGstdClass(); $rss_item->get_enclosures[0]->get_link = str_replace('/default.jpg', '/maxresdefault.jpg', $xpath->query('.//img',$video)->item(0)->getAttribute('data-thumb')); $rss_item->get_permalink = $host.$anchor->getAttribute('href'); $rss_items[] = $rss_item; $count++; } } return $rss_items; } function scrape_youtube_channel($rss_url, $limit){ //http://gdata.youtube.com/feeds/base/users/MAKO0MAKO0/uploads?max-results=50 if (preg_match('%(?<=/feeds/base/users/).*(?=/)%im', $rss_url, $regs)) { $url = "https://www.youtube.com/user/".$regs[0]."/videos?flow=list&sort=dd&hl=en"; }elseif(preg_match('%(?<=youtube\.com/user/)[^/]*%im', $rss_url, $regs)) { $url = "https://www.youtube.com/user/".$regs[0]."/videos?flow=list&sort=dd&hl=en"; }else{ return __('YouTube username could not be determined.', 'jig_td'); } $host = !is_ssl() ? 'http://www.youtube.com' : 'https://www.youtube.com'; //$author = preg_replace('#^(https?://[^/])/user/([^/]+).*#', '$1', $url); $html = $this->file_get_contents_curl($url); $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); $doc = new DOMDocument(); @$doc->loadHTML($html); $xpath = new DOMXpath($doc); $videos = $xpath->query('//li[contains(concat(" ", normalize-space(@class), " "), " feed-item-container ")]'); $rss_items = array(); $count = 0; if (!empty($videos)) { foreach ($videos as $video) { if($count == $limit){ break; } $anchor = $xpath->query('.//a[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile-link ")][starts-with(@href, "/watch")]',$video)->item(0); $rss_item = new JIGstdClass(); $rss_item->get_title = trim($anchor->getAttribute('title')); $rss_item->get_description = trim($xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " yt-lockup-description ")]',$video)->item(0)->nodeValue); $rss_item->get_date = $xpath->query('.//ul[contains(concat(" ", normalize-space(@class), " "), " yt-lockup-meta-info ")]/*[1]',$video)->item(0)->nodeValue; $rss_item->get_enclosures = array(); $rss_item->get_enclosures[] = new JIGstdClass(); $rss_item->get_enclosures[0]->get_link = str_replace('/mqdefault.jpg', '/maxresdefault.jpg', $xpath->query('.//img',$video)->item(0)->getAttribute('data-thumb')); $rss_item->get_permalink = $host.$anchor->getAttribute('href'); $rss_items[] = $rss_item; $count++; } } return $rss_items; }
Revision: 69125
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at April 23, 2015 07:04 by Firsh
Initial Code
function scrape_youtube($rss_url, $limit){ $limit = $limit === 0 ? -1 : $limit; if(stripos($rss_url, 'gdata.youtube.com') !== false || stripos($rss_url, 'youtube.com/user/') !== false){ return $this->scrape_youtube_channel($rss_url, $limit); }elseif(stripos($rss_url, 'list=') !== false){ return $this->scrape_youtube_playlist($rss_url, $limit); }else{ return __('YouTube source could not be determined.', 'jig_td'); } } function scrape_youtube_playlist($rss_url, $limit){ if (preg_match('/(?<=list=)[^&#?\s]*/im', $rss_url, $regs)) { $url = "https://www.youtube.com/playlist?list=".$regs[0]."&hl=en"; }else{ return __('YouTube playlist ID could not be determined.', 'jig_td'); } $host = !is_ssl() ? 'http://www.youtube.com' : 'https://www.youtube.com'; //$author = preg_replace('#^(https?://[^/])/user/([^/]+).*#', '$1', $url); $html = $this->file_get_contents_curl($url); $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); $doc = new DOMDocument(); @$doc->loadHTML($html); $xpath = new DOMXpath($doc); $videos = $xpath->query('//tr[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile ")]'); $rss_items = array(); $count = 0; if (!empty($videos)) { foreach ($videos as $video) { if($count == $limit){ break; } $anchor = $xpath->query('.//a[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile-link ")][starts-with(@href, "/watch")]',$video)->item(0); $ownerAnchor = $xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " pl-video-owner ")]/a',$video)->item(0); $rss_item = new JIGstdClass(); $rss_item->get_title = trim($anchor->nodeValue); if($rss_item->get_title == "[Private Video]" || $rss_item->get_title == "[Deleted Video]"){ continue; } $rss_item->get_description = (!empty($ownerAnchor) ? __('by','jig_td').' <a href="'.$host.$ownerAnchor->getAttribute('href').'" target="_blank">'.trim($ownerAnchor->nodeValue).'</a>' : ''); $rss_item->get_date = __("No date available.","jig_td"); $rss_item->get_enclosures = array(); $rss_item->get_enclosures[] = new JIGstdClass(); $rss_item->get_enclosures[0]->get_link = str_replace('/default.jpg', '/maxresdefault.jpg', $xpath->query('.//img',$video)->item(0)->getAttribute('data-thumb')); $rss_item->get_permalink = $host.$anchor->getAttribute('href'); $rss_items[] = $rss_item; $count++; } } return $rss_items; } function scrape_youtube_channel($rss_url, $limit){ //http://gdata.youtube.com/feeds/base/users/MAKO0MAKO0/uploads?max-results=50 if (preg_match('%(?<=/feeds/base/users/).*(?=/)%im', $rss_url, $regs)) { $url = "https://www.youtube.com/user/".$regs[0]."/videos?flow=list&sort=dd&hl=en"; }elseif(preg_match('%(?<=youtube\.com/user/)[^/]*%im', $rss_url, $regs)) { $url = "https://www.youtube.com/user/".$regs[0]."/videos?flow=list&sort=dd&hl=en"; }else{ return __('YouTube username could not be determined.', 'jig_td'); } $host = !is_ssl() ? 'http://www.youtube.com' : 'https://www.youtube.com'; //$author = preg_replace('#^(https?://[^/])/user/([^/]+).*#', '$1', $url); $html = $this->file_get_contents_curl($url); $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); $doc = new DOMDocument(); @$doc->loadHTML($html); $xpath = new DOMXpath($doc); $videos = $xpath->query('//li[contains(concat(" ", normalize-space(@class), " "), " feed-item-container ")]'); $rss_items = array(); $count = 0; if (!empty($videos)) { foreach ($videos as $video) { if($count == $limit){ break; } $anchor = $xpath->query('.//a[contains(concat(" ", normalize-space(@class), " "), " yt-uix-tile-link ")][starts-with(@href, "/watch")]',$video)->item(0); $rss_item = new JIGstdClass(); $rss_item->get_title = trim($anchor->getAttribute('title')); $rss_item->get_description = trim($xpath->query('.//div[contains(concat(" ", normalize-space(@class), " "), " yt-lockup-description ")]',$video)->item(0)->nodeValue); $rss_item->get_date = $xpath->query('.//ul[contains(concat(" ", normalize-space(@class), " "), " yt-lockup-meta-info ")]/*[1]',$video)->item(0)->nodeValue; $rss_item->get_enclosures = array(); $rss_item->get_enclosures[] = new JIGstdClass(); $rss_item->get_enclosures[0]->get_link = str_replace('/mqdefault.jpg', '/maxresdefault.jpg', $xpath->query('.//img',$video)->item(0)->getAttribute('data-thumb')); $rss_item->get_permalink = $host.$anchor->getAttribute('href'); $rss_items[] = $rss_item; $count++; } } return $rss_items; }
Initial URL
http://justifiedgrid.com/
Initial Description
http://stackoverflow.com/questions/29752447/how-to-get-a-youtube-channel-rss-feed-after-2015-april-20-without-v3-api
Initial Title
Improved YouTube scrapers
Initial Tags
Initial Language
PHP