Return to Snippet

Revision: 18415
at September 30, 2009 15:01 by karlhorky


Updated Code
# Modify XML Feed Items' pubDate to different date format
# Author: Karl Horky
# Date: 29 September 2009
#
# Sample Input:
#   <?xml version="1.0" encoding="UTF-8"?>
#   <rss version="2.0">
#   	<channel>
#   		<title>News</title>
#   		<link>http://www.news.com/</link>
#   		<description>The latest headlines</description>
#   		<language>en-us</language>
#   		<copyright>Copyright �© 2009</copyright>
#   		<ttl>5</ttl>
#   		<item>
#       <title>News Item 1</title>
#       <description>Item Description 1</description>
#       <link>http://www.news.com/news_release_1.htm</link>
#       <pubDate>Tue, 29 Sep 2009 17:47:42 GMT</pubDate>
#       </item>
#       
#       <item>
#       <title>News Item 2</title>
#       <description>Item Description 2</description>
#       <link>http://www.news.com/news_release_2.htm</link>
#       <pubDate>Mon, 24 Aug 2009 07:00:00 GMT</pubDate>
#       </item>		
#   	</channel>
#   </rss>
#   
# 
# 
# Sample Output file:
#   <rss version="2.0">
#   	<channel>
#   		<title>News</title>
#   		<link>http://www.news.com/</link>
#   		<description>The latest headlines</description>
#   		<language>en-us</language>
#   		<copyright>Copyright �© 2009</copyright>
#   		<ttl>5</ttl>
#   		<item>
#       <title>News Item 1</title>
#       <description>Item Description 1</description>
#       <link>http://www.news.com/news_release_1.htm</link>
#       <pubDate>September 29, 2009</pubDate>
#       </item>
#       
#       <item>
#       <title>News Item 2</title>
#       <description>Item Description 2</description>
#       <link>http://www.news.com/news_release_2.htm</link>
#       <pubDate>August 24, 2009</pubDate>
#       </item>		
#   	</channel>
#   </rss>
#   
#!/usr/local/bin/perl

require LWP::UserAgent;
use Data::Dumper;
use XML::Simple;
use Date::Manip;

$output = 'feed.xml'; # The location of your output file

my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
my $response = $ua->get('http://www.example.com/feed.xml'); # The location of the input file

if ($response->is_success) {
	$xml = $response->content;
} else {
	exit (1);
}
my $xs = new XML::Simple(keeproot => 1,searchpath => ".", forcearray => 1, keyattr => [key, tag]);

my $ref = $xs->XMLin($xml, KeepRoot => 1);

foreach my $item (@{$ref->{rss}->[0]->{channel}->[0]->{item}}){
  my $currDate = \$item->{pubDate}->[0];
  $$currDate = UnixDate($$currDate,"%B %d, %Y");
}

my $xml = $xs->XMLout($ref, KeepRoot=>1);

open (OUT, ">$output") or die "Cannot open file $output: $!\n";
print OUT $xml;
close (OUT);

Revision: 18414
at September 30, 2009 14:55 by karlhorky


Updated Code
# Modify XML Feed Items' pubDate to different date format
# Author: Karl Horky
# Date: 29 September 2009
#
# Sample Input:
#   <?xml version="1.0" encoding="UTF-8"?>
#   <rss version="2.0">
#   	<channel>
#   		<title>News</title>
#   		<link>http://www.news.com/</link>
#   		<description>The latest headlines</description>
#   		<language>en-us</language>
#   		<copyright>Copyright © 2009</copyright>
#   		<ttl>5</ttl>
#   		<item>
#       <title>News Item 1</title>
#       <description>Item Description 1</description>
#       <link>http://www.news.com/news_release_1.htm</link>
#       <pubDate>Tue, 29 Sep 2009 17:47:42 GMT</pubDate>
#       </item>
#       
#       <item>
#       <title>News Item 2</title>
#       <description>Item Description 2</description>
#       <link>http://www.news.com/news_release_2.htm</link>
#       <pubDate>Mon, 24 Aug 2009 07:00:00 GMT</pubDate>
#       </item>		
#   	</channel>
#   </rss>
#   
# 
# 
# Sample Output file:
#   <rss version="2.0">
#   	<channel>
#   		<title>News</title>
#   		<link>http://www.news.com/</link>
#   		<description>The latest headlines</description>
#   		<language>en-us</language>
#   		<copyright>Copyright © 2009</copyright>
#   		<ttl>5</ttl>
#   		<item>
#       <title>News Item 1</title>
#       <description>Item Description 1</description>
#       <link>http://www.news.com/news_release_1.htm</link>
#       <pubDate>September 29, 2009</pubDate>
#       </item>
#       
#       <item>
#       <title>News Item 2</title>
#       <description>Item Description 2</description>
#       <link>http://www.news.com/news_release_2.htm</link>
#       <pubDate>August 24, 2009</pubDate>
#       </item>		
#   	</channel>
#   </rss>
#   
#!/usr/local/bin/perl

require LWP::UserAgent;
use Data::Dumper;
use XML::Simple;
use Date::Manip;

$output = 'feed.xml'; # The location of your output file

my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
my $response = $ua->get('http://www.example.com/feed.xml'); # The location of the input file

if ($response->is_success) {
	$xml = $response->content;
} else {
	exit (1);
}
my $xs = new XML::Simple(keeproot => 1,searchpath => ".", forcearray => 1, keyattr => [key, tag]);

my $ref = $xs->XMLin($xml, KeepRoot => 1);

foreach my $item (@{$ref->{rss}->[0]->{channel}->[0]->{item}}){
  my $currDate = \$item->{pubDate}->[0];
  $$currDate = UnixDate($$currDate,"%B %d, %Y");
}

my $xml = $xs->XMLout($ref, KeepRoot=>1);

open (OUT, ">$output") or die "Cannot open file $output: $!\n";
print OUT $xml;
close (OUT);

Revision: 18413
at September 29, 2009 17:14 by karlhorky


Initial Code
# Modify XML Feed Items' pubDate to different date format
# Author: Karl Horky
# Date: 29 September 2009
#
# Sample Input:
#   <?xml version="1.0" encoding="UTF-8"?>
#   <rss version="2.0">
#   	<channel>
#   		<title>News</title>
#   		<link>http://www.news.com/</link>
#   		<description>The latest headlines</description>
#   		<language>en-us</language>
#   		<copyright>Copyright © 2009</copyright>
#   		<ttl>5</ttl>
#   		<item>
#       <title>News Item 1</title>
#       <description>Item Description 1</description>
#       <link>http://www2.news.gov.bc.ca/news_releases_2009-2013/2009HSERV0001-000004.htm</link>
#       <pubDate>Tue, 29 Sep 2009 17:47:42 GMT</pubDate>
#       </item>
#       
#       <item>
#       <title>News Item 2</title>
#       <description>Item Description 2</description>
#       <link>http://www2.news.gov.bc.ca/news_releases_2009-2013/2009EDUC0006-000250.htm</link>
#       <pubDate>Mon, 24 Aug 2009 07:00:00 GMT</pubDate>
#       </item>		
#   	</channel>
#   </rss>
#   
# 
# 
# Sample Output file:
#   <rss version="2.0">
#   	<channel>
#   		<title>News</title>
#   		<link>http://www.news.com/</link>
#   		<description>The latest headlines</description>
#   		<language>en-us</language>
#   		<copyright>Copyright © 2009</copyright>
#   		<ttl>5</ttl>
#   		<item>
#       <title>News Item 1</title>
#       <description>Item Description 1</description>
#       <link>http://www2.news.gov.bc.ca/news_releases_2009-2013/2009HSERV0001-000004.htm</link>
#       <pubDate>September 29, 2009</pubDate>
#       </item>
#       
#       <item>
#       <title>News Item 2</title>
#       <description>Item Description 2</description>
#       <link>http://www2.news.gov.bc.ca/news_releases_2009-2013/2009EDUC0006-000250.htm</link>
#       <pubDate>August 24, 2009</pubDate>
#       </item>		
#   	</channel>
#   </rss>
#   
#!/usr/local/bin/perl

require LWP::UserAgent;
use Data::Dumper;
use XML::Simple;
use Date::Manip;

$output = 'feed.xml'; # The location of your output file

my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
my $response = $ua->get('http://www.example.com/feed.xml'); # The location of the input file

if ($response->is_success) {
	$xml = $response->content;
} else {
	exit (1);
}
my $xs = new XML::Simple(keeproot => 1,searchpath => ".", forcearray => 1, keyattr => [key, tag]);

my $ref = $xs->XMLin($xml, KeepRoot => 1);

foreach my $item (@{$ref->{rss}->[0]->{channel}->[0]->{item}}){
  my $currDate = \$item->{pubDate}->[0];
  $$currDate = UnixDate($$currDate,"%B %d, %Y");
}

my $xml = $xs->XMLout($ref, KeepRoot=>1);

open (OUT, ">$output") or die "Cannot open file $output: $!\n";
print OUT $xml;
close (OUT);

Initial URL


Initial Description
Read in a remote XML file, change the pubDate date format, and output to specific local file.

Initial Title
Perl: Modify XML Feed Items' pubDate

Initial Tags
date, replace, xml

Initial Language
Perl