1 |
#!/usr/bin/python |
#!/usr/bin/python |
2 |
|
|
3 |
import BeautifulSoup |
import copy |
4 |
import urllib2 |
import urllib2 |
5 |
import time |
import time |
6 |
import datetime |
import datetime |
7 |
|
import lxml.html |
8 |
import xml.sax.saxutils |
import xml.sax.saxutils |
9 |
|
|
10 |
def main(): |
def main(): |
11 |
imdb = urllib2.urlopen("http://www.imdb.com/nowplaying/") |
imdb = urllib2.urlopen("http://www.imdb.com/movies-in-theaters/") |
12 |
soup = BeautifulSoup.BeautifulSoup("".join(imdb.readlines())) |
doc = lxml.html.fromstring("".join(imdb.readlines())) |
13 |
moviestuff = soup.findAll("table")[0] |
for element in doc.iter(tag=lxml.etree.Element): |
14 |
moviestuff.table.extract() |
if element.tag.endswith("div"): |
15 |
movies = xml.sax.saxutils.escape(moviestuff.prettify()) |
if element.attrib.get("id", "") == "main": |
16 |
|
break |
17 |
|
new_releases = copy.deepcopy(element) |
18 |
|
movies = xml.sax.saxutils.escape(lxml.html.tostring(new_releases)) |
19 |
print """<?xml version="1.0" encoding="utf-8"?> |
print """<?xml version="1.0" encoding="utf-8"?> |
20 |
<feed xmlns="http://www.w3.org/2005/Atom"> |
<feed xmlns="http://www.w3.org/2005/Atom"> |
21 |
|
|