Return to Snippet

Revision: 6813
at June 16, 2008 21:48 by tommorris


Updated Code
require 'rexml/document'

class REXML::Element

  public
  def lang
    if self.attributes['xml:lang']
      return self.attributes['xml:lang'].to_s
    elsif self.parent != nil
      return self.parent.lang
    else
      return nil
    end
  end

end

describe "XML library" do
  it "should handle xml:lang inheritance properly" do
    xmldoc = <<-EOF;
    <?xml version="1.0" ?>
    <foo xml:lang="en">
      <bar>Hello World!</bar>
    </foo>
    EOF
    
    xml = REXML::Document.new(xmldoc)
    xml.elements[1].elements[1].lang.should == "en"
    
    xmldoc2 = <<-EOF;
    <?xml version="1.0" ?>
    <foo>
      <bar>Hello World!</bar>
    </foo>
    EOF
    
    xml2 = REXML::Document.new(xmldoc2)
    xml2.elements[1].elements[1].lang.should_not == "en"
    xml2.elements[1].elements[1].lang.should == nil
    
    xmldoc3 = <<-EOF;
    <?xml version="1.0" ?>
    <foo>
      <bar xml:lang="en">Hello World!</bar>
    </foo>
    EOF
    
    xml3 = REXML::Document.new(xmldoc3)
    xml3.elements[1].elements[1].lang.should == "en"
  end
end

Revision: 6812
at June 16, 2008 21:42 by tommorris


Updated Code
require 'rexml/document'

class REXML::Element
  public
  def lang
    if self.attributes['xml:lang']
      return self.attributes['xml:lang'].to_s
    else
      return self.recurse_upwards_to_find_lang(self)
    end
  end
  
  protected
  def recurse_upwards_to_find_lang(el)
    if el.parent.class != NilClass
      if el.parent?
        if el.parent.attributes['xml:lang']
          return el.parent.attributes['xml:lang'].to_s
        else
          self.recurse_upwards_to_find_lang(el.parent)
        end
      end
    end
  end
  
end

describe "XML library" do
  it "should handle xml:lang inheritance properly" do
    xmldoc = <<-EOF;
    <?xml version="1.0" ?>
    <foo xml:lang="en">
      <bar>Hello World!</bar>
    </foo>
    EOF
    
    xml = REXML::Document.new(xmldoc)
    xml.elements[1].elements[1].lang.should == "en"
    
    xmldoc2 = <<-EOF;
    <?xml version="1.0" ?>
    <foo>
      <bar>Hello World!</bar>
    </foo>
    EOF
    
    xml2 = REXML::Document.new(xmldoc2)
    xml2.elements[1].elements[1].lang.should_not == "en"
    
    xmldoc3 = <<-EOF;
    <?xml version="1.0" ?>
    <foo>
      <bar xml:lang="en">Hello World!</bar>
    </foo>
    EOF
    
    xml3 = REXML::Document.new(xmldoc3)
    xml3.elements[1].elements[1].lang.should == "en"
  end
end

Revision: 6811
at June 16, 2008 21:30 by tommorris


Initial Code
require 'rexml/document'

class REXML::Element
  public
  def lang
    if self.attributes['xml:lang']
      prints self.attributes['xml:lang']
      return self.attributes['xml:lang'].to_s
    else
      return self.recurse_upwards_to_find_lang(self)
    end
  end
  
  protected
  def recurse_upwards_to_find_lang(el)
    if el.parent?
      if el.parent.attributes['xml:lang']
        return el.parent.attributes['xml:lang'].to_s
      else
        return self.recurse_upwards_to_find_lang(el)
      end
    end
  end
end

describe "XML library" do
  it "should handle xml:lang inheritance properly" do
    xmldoc = <<-EOF;
    <?xml version="1.0" ?>
    <foo xml:lang="en">
      <bar>Hello World!</bar>
    </foo>
    EOF
    
    xml = REXML::Document.new(xmldoc)
    xml.elements[1].lang.should == "en"
  end
end

Initial URL

                                

Initial Description
REXML does not seem to have a 'lang' method, which is strange since lang is in the XML 1.0 Specification §2.12 'Language Identification', and in many other libraries. Because of Ruby's 'monkey patching', it's pretty easy to add - you just recursively browse the ancestors. Alas, because attributes are stored internally as strings and not an Attribute object, there does not seem to be any way of monkey patching §2.12 support on to REXML's attribute handling.

One day, Ruby will have a really good XML parser - speedy as a SAX and fully DOMinanting over the vagaries of namespaces etc.

Enclosed is an RSpec test that demonstrates usage.

Initial Title
REXML Language Identification monkey patch

Initial Tags
ruby, xml

Initial Language
Ruby