title: //h1[@id='articlehed'] | //h2[@id="articleintro"]
body: //article//div[@id='articleBody']

author: //header//p[contains(@class, 'Byline')]//a

date: //meta[@name='pubdate']/@value

single_page_link: //div[@class='paginationViewSinglePage']/a
test_url: http://www.newyorker.com/online/blogs/culture/2012/06/mug-shot-web-sites.html
test_url: http://www.newyorker.com/reporting/2013/04/22/130422fa_fact_bilger?currentPage=all&mobify=0