/[svn.andrew.net.au]/scripts/movies.py
ViewVC logotype

Contents of /scripts/movies.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 53 - (show annotations)
Mon Mar 19 04:32:36 2012 UTC (10 years, 6 months ago) by apollock
File MIME type: text/x-python
File size: 1627 byte(s)
Do another pass to exclude the weekly top 10

1 #!/usr/bin/python
2
3 import copy
4 import urllib2
5 import time
6 import datetime
7 import lxml.html
8 import xml.sax.saxutils
9
10 crufty_classes = [
11 "rating rating-list",
12 "add_to_watchlist",
13 "see-more",
14 "rating_txt",
15 "header",
16 ]
17
18 def main():
19 imdb = urllib2.urlopen("http://www.imdb.com/movies-in-theaters/")
20 doc = lxml.html.fromstring("".join(imdb.readlines()))
21 for element in doc.iter(tag=lxml.etree.Element):
22 if element.tag.endswith("div"):
23 if element.attrib.get("id", "") == "main":
24 break
25 new_releases = copy.deepcopy(element)
26 # Go deeper to ditch the Box Office Top Ten
27 for element in new_releases.iter(tag=lxml.etree.Element):
28 if element.attrib.get("class", "") == "list detail sub-list":
29 break
30 new_releases = copy.deepcopy(element)
31 for crufty_class in crufty_classes:
32 for cruft in new_releases.find_class(crufty_class):
33 cruft.drop_tree()
34 movies = xml.sax.saxutils.escape(lxml.html.tostring(new_releases))
35 print """<?xml version="1.0" encoding="utf-8"?>
36 <feed xmlns="http://www.w3.org/2005/Atom">
37
38 <link href="http://home.andrew.net.au/~apollock/movies.xml" rel="self"/>
39
40 <title>This week's movies from IMDb</title>
41 <updated>%(updated)sZ</updated>
42 <author>
43 <name>Andrew Pollock</name>
44 </author>
45 <id>http://www.andrew.net.au/</id>
46
47 <entry>
48 <id>http://home.andrew.net.au/movies/%(timestamp)s</id>
49
50 <updated>%(updated)sZ</updated>
51 <title>This week's movies</title>
52
53 <content type="html">
54 %(movies)s
55 </content>
56 </entry>
57 </feed>
58 """ % { 'updated': datetime.datetime.utcnow().isoformat()[0:19], 'movies': movies, 'timestamp': int(time.time()) }
59
60 if __name__ == "__main__":
61 main()

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.22