/ Published in: SAS
The program uses the XML LIBNAME engine, FILENAME URL, SGPLOT procedure, and a simple PROC PRINT to create a report of recent Twitter activity around a specified hashtag.
Expand |
Embed | Plain Text
Copy this code and paste it in your HTML
/* This part needs to run just once to establish */ /* the XML map that we'll use to map the Twitter */ /* XML response into a SAS data set */ filename twsearch temp; /** this is the XML map that will convert the RSS search feed into a SAS data set **/ data _null_; infile datalines truncover; file twsearch; input line $1000.; put line; datalines4; <?xml version="1.0" encoding="windows-1252"?> <!-- ############################################################ --> <!-- 2008-10-03T11:35:31 --> <!-- SAS XML Libname Engine Map --> <!-- Generated by XML Mapper, 902000.2.1.20080911191346_v920 --> <!-- ############################################################ --> <SXLEMAP name="SXLEMAP" version="1.2"> <!-- ############################################################ --> <TABLE name="entry"> <TABLE-PATH syntax="XPath">/feed/entry</TABLE-PATH> <COLUMN name="id"> <PATH syntax="XPath">/feed/entry/id</PATH> <TYPE>character</TYPE> <DATATYPE>string</DATATYPE> <LENGTH>50</LENGTH> </COLUMN> <COLUMN name="published"> <PATH syntax="XPath">/feed/entry/published</PATH> <TYPE>numeric</TYPE> <DATATYPE>datetime</DATATYPE> <FORMAT width="19">IS8601DT</FORMAT> <INFORMAT width="19">IS8601DT</INFORMAT> </COLUMN> <COLUMN name="title"> <PATH syntax="XPath">/feed/entry/title</PATH> <TYPE>character</TYPE> <DATATYPE>string</DATATYPE> <LENGTH>200</LENGTH> </COLUMN> <COLUMN name="content"> <PATH syntax="XPath">/feed/entry/content</PATH> <TYPE>character</TYPE> <DATATYPE>string</DATATYPE> <LENGTH>800</LENGTH> </COLUMN> <COLUMN name="updated"> <PATH syntax="XPath">/feed/entry/updated</PATH> <TYPE>numeric</TYPE> <DATATYPE>datetime</DATATYPE> <FORMAT width="19">IS8601DT</FORMAT> <INFORMAT width="19">IS8601DT</INFORMAT> </COLUMN> <COLUMN name="authorName"> <PATH syntax="XPath">/feed/entry/author/name</PATH> <TYPE>character</TYPE> <DATATYPE>string</DATATYPE> <LENGTH>50</LENGTH> </COLUMN> <COLUMN name="authorUri"> <PATH syntax="XPath">/feed/entry/author/uri</PATH> <TYPE>character</TYPE> <DATATYPE>string</DATATYPE> <LENGTH>50</LENGTH> </COLUMN> </TABLE> </SXLEMAP> ;;;; /** this macro makes it simple to get several "pages" worth of tweets **/ %macro getTweets(pages=5,hashtag=sasgf11,scale=HOURS); %if &scale = HOURS %then %do; %let scaleVar = hoursAgo; %end; %else %if &scale = MINUTES %then %do; %let scaleVar = minutesAgo; %end; %else %if &scale = DAYS %then %do; %let scaleVar = daysAgo; %end; /* create initial dataset */ data work.feed; run; %do pgNo=1 %to &pages; /* used %NRSTR() to escape the ampersands that occur in this URL query string */ /* &hashtag and &pgNo are macro variables that are resolved at run time */ %let feed="http://search.twitter.com/search.atom?lang=en%nrstr(&q)=%23&hashtag.%nrstr(&page)=&pgNo"; filename twit URL &feed /* if you need to specify a proxy server to get to the internet */ /* proxy="http://your.proxy.com" */ ; /* use the XML library engine */ libname tf XML xmlfileref=twit xmlmap=twsearch; data work.feed; /* when run in SAS Enterprise Guide, SYSECHO will */ /* update the task status with this message */ sysecho "Fetching tweet page &pgNo of &pages"; set work.feed tf.entry; run; %end; data work.feed; set work.feed; length hoursAgo 8 minutesAgo 8 daysAgo 8; label hoursAgo = "Hours ago" minutesAgo = "Minutes ago" daysAgo = "Days ago"; if published not = .; published=published+gmtoff(); daysAgo = datdif(datepart(published),today(),'act/act'); hoursAgo = int( (datetime()-published) / 3600 ); minutesAgo = int( (datetime()-published) / 60 ); run; title "Report of #&hashtag. hashtag activity"; title2 "as of %TRIM(%QSYSFUNC(DATE(), NLDATE20.)) at %TRIM(%SYSFUNC(TIME(), TIMEAMPM12.))"; ods graphics / height=500 width=800; proc sgplot data=work.feed; vbar &scaleVar; yaxis LABEL="Number of tweets"; xaxis discreteorder=data; run; proc print data=work.feed obs="Row Number" label ; format published dateampm20.; var published authorname title; run; %mend; /* for high-volume topics, set the scale to MINUTES */ /* or HOURS for more interesting reports */ /* example call to find #SASGF11 tweets */ %getTweets(pages=10, hashtag=sasgf11, scale=DAYS);
URL: http://www.sascommunity.org/wiki/Twitter_SAS_program_example