RSS Parser (SAX)
RSS (Really Simple Syndication)
RSS is way to publish frequently changing contents like blog posts, news updates, stock quotes & things like that. An RSS document, which is called a “feed,” “web feed,” or “channel,” contains either a summary of content from an associated web site or the full text. RSS formats are specified using XML, a generic specification for the creation of data formats.
I have attached a simple SAX parser for RSS. Please let me know if there is any flaw in the attached code. This code is provided for learning purpose with less focus on coding standards & it’s efficiency. You are free to use & modify it.
package subin.rnd.xml;
import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.Properties;
import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes; import org.xml.sax.helpers.DefaultHandler;
public class RssParser extends DefaultHandler
{
private String urlString;
private RssFeed rssFeed;
private StringBuilder text;
private Item item;
private boolean imgStatus;
public RssParser(String url)
{
this.urlString = url;
this.text = new StringBuilder();
}
public void parse()
{
InputStream urlInputStream = null;
SAXParserFactory spf = null;
SAXParser sp = null;
try
{
URL url = new URL(this.urlString);
_setProxy(); // Set the proxy if needed
urlInputStream = url.openConnection().getInputStream();
spf = SAXParserFactory.newInstance();
if (spf != null)
{
sp = spf.newSAXParser();
sp.parse(urlInputStream, this);
}
}
/*
* Exceptions need to be handled
* MalformedURLException
* ParserConfigurationException
* IOException
* SAXException
*/
catch (Exception e)
{
System.out.println("Exception: " + e);
e.printStackTrace();
}
finally
{
try
{
if (urlInputStream != null) urlInputStream.close();
}
catch (Exception e) {}
}
}
public RssFeed getFeed()
{
return (this.rssFeed);
}
public void startElement(String uri, String localName, String qName,
Attributes attributes)
{
if (qName.equalsIgnoreCase("channel"))
this.rssFeed = new RssFeed();
else if (qName.equalsIgnoreCase("item") && (this.rssFeed != null))
{
this.item = new Item();
this.rssFeed.addItem(this.item);
}
else if (qName.equalsIgnoreCase("image") && (this.rssFeed != null))
this.imgStatus = true;
}
public void endElement(String uri, String localName, String qName)
{
if (this.rssFeed == null)
return;
if (qName.equalsIgnoreCase("item"))
this.item = null;
else if (qName.equalsIgnoreCase("image"))
this.imgStatus = false;
else if (qName.equalsIgnoreCase("title"))
{
if (this.item != null) this.item.title = this.text.toString().trim();
else if (this.imgStatus) this.rssFeed.imageTitle = this.text.toString().trim();
else this.rssFeed.title = this.text.toString().trim();
}
else if (qName.equalsIgnoreCase("link"))
{
if (this.item != null) this.item.link = this.text.toString().trim();
else if (this.imgStatus) this.rssFeed.imageLink = this.text.toString().trim();
else this.rssFeed.link = this.text.toString().trim();
}
else if (qName.equalsIgnoreCase("description"))
{
if (this.item != null) this.item.description = this.text.toString().trim();
else this.rssFeed.description = this.text.toString().trim();
}
else if (qName.equalsIgnoreCase("url") && this.imgStatus)
this.rssFeed.imageUrl = this.text.toString().trim();
else if (qName.equalsIgnoreCase("language"))
this.rssFeed.language = this.text.toString().trim();
else if (qName.equalsIgnoreCase("generator"))
this.rssFeed.generator = this.text.toString().trim();
else if (qName.equalsIgnoreCase("copyright"))
this.rssFeed.copyright = this.text.toString().trim();
else if (qName.equalsIgnoreCase("pubDate") && (this.item != null))
this.item.pubDate = this.text.toString().trim();
else if (qName.equalsIgnoreCase("category") && (this.item != null))
this.rssFeed.addItem(this.text.toString().trim(), this.item);
this.text.setLength(0);
}
public void characters(char[] ch, int start, int length)
{
this.text.append(ch, start, length);
}
public static void _setProxy()
throws IOException
{
Properties sysProperties = System.getProperties();
sysProperties.put("proxyHost", "<Proxy IP Address>");
sysProperties.put("proxyPort", "<Proxy Port Number>");
System.setProperties(sysProperties);
}
public static class RssFeed
{
public String title;
public String description;
public String link;
public String language;
public String generator;
public String copyright;
public String imageUrl;
public String imageTitle;
public String imageLink;
private ArrayList <Item> items;
private HashMap <String, ArrayList <Item>> category;
public void addItem(Item item)
{
if (this.items == null)
this.items = new ArrayList<Item>();
this.items.add(item);
}
public void addItem(String category, Item item)
{
if (this.category == null)
this.category = new HashMap<String, ArrayList<Item>>();
if (!this.category.containsKey(category))
this.category.put(category, new ArrayList<Item>());
this.category.get(category).add(item);
}
}
public static class Item
{
public String title;
public String description;
public String link;
public String pubDate;
public String toString()
{
return (this.title + ": " + this.pubDate + "n" + this.description);
}
}
}
Using RssParser.java :
RssParser rp = new RssParser("<RSS Feed URL>");
rp.parse();
RssFeed feed = rp.getFeed();
// Listing all categories & the no. of elements in each category
if (feed.category != null)
{
System.out.println("Category List: ");
for (String category : feed.category.keySet())
{
System.out.println(category
+ ": "
+ ((ArrayList<Item>)feed.category.get(category)).size());
}
}
// Listing all items in the feed for (int i = 0; i < feed.items.size(); i++) System.out.println(feed.items.get(i).title);


thank you!! I need it!
sciafranz
September 9, 2008 at 9:20 pm
My pleasure
Subinkrishna G
September 10, 2008 at 10:22 am
This is a very basic RSS parser. My intension was to write a parser for J2ME enabled mobile devices. Thats why I wrote it with SAX. By changing some of the Collection objects used in the code we can use it in J2ME (e.g. HashMap to Hashtable).
But I personally prefer to use JDOM parser for all XML parsing needs. JDOM is again SAX based and gives us a DOM-like document object. And it’s very simple to use too. Find more about it here: http://www.jdom.org
I will try to post a JDOM based parser soon.
Subinkrishna G
January 22, 2009 at 12:55 pm