olauzanne / pyquery (http://pypi.python.org/pypi/pyquery)
A jquery-like library for python
Clone this repository (size: 130.4 KB): HTTPS / SSH
$ hg clone http://bitbucket.org/olauzanne/pyquery/
| commit 105: | ca983856d2a8 |
| parent 104: | 57314579ad88 |
| branch: | default |
Added tag 0.4 for changeset 57314579ad88
| # Introduced |
|---|
1 |
b195c88d1d6a |
#-*- coding:utf-8 -*- |
2 |
b195c88d1d6a |
# |
3 |
b195c88d1d6a |
# Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com> |
4 |
b195c88d1d6a |
# |
5 |
b195c88d1d6a |
# Distributed under the BSD license, see LICENSE.txt |
6 |
85b0ae322634 |
from cssselectpatch import selector_to_xpath |
7 |
85b0ae322634 |
from lxml import etree |
8 |
a8fc828d5ac0 |
import lxml.html |
9 |
b195c88d1d6a |
from copy import deepcopy |
10 |
e438752b3e14 |
from urlparse import urljoin |
11 |
b195c88d1d6a |
|
12 |
b45eaa021356 |
def fromstring(context, parser=None): |
13 |
3c4766cfda86 |
"""use html parser if we don't have clean xml |
14 |
3c4766cfda86 |
""" |
15 |
b45eaa021356 |
if parser == None: |
16 |
b45eaa021356 |
try: |
17 |
a8fc828d5ac0 |
return [etree.fromstring(context)] |
18 |
b45eaa021356 |
except etree.XMLSyntaxError: |
19 |
a8fc828d5ac0 |
return [lxml.html.fromstring(context)] |
20 |
b45eaa021356 |
elif parser == 'xml': |
21 |
a8fc828d5ac0 |
return [etree.fromstring(context)] |
22 |
b45eaa021356 |
elif parser == 'html': |
23 |
a8fc828d5ac0 |
return [lxml.html.fromstring(context)] |
24 |
a8fc828d5ac0 |
elif parser == 'html_fragments': |
25 |
a8fc828d5ac0 |
return lxml.html.fragments_fromstring(context) |
26 |
b45eaa021356 |
else: |
27 |
b45eaa021356 |
ValueError('No such parser: "%s"' % parser) |
28 |
3c4766cfda86 |
|
29 |
817e3b5c8b58 |
class NoDefault(object): |
30 |
817e3b5c8b58 |
def __repr__(self): |
31 |
817e3b5c8b58 |
"""clean representation in Sphinx""" |
32 |
817e3b5c8b58 |
return '<NoDefault>' |
33 |
817e3b5c8b58 |
|
34 |
817e3b5c8b58 |
no_default = NoDefault() |
35 |
817e3b5c8b58 |
del NoDefault |
36 |
480986cd29f2 |
|
37 |
85b9e5c80e08 |
class FlexibleElement(object): |
38 |
480986cd29f2 |
"""property to allow a flexible api""" |
39 |
85b9e5c80e08 |
def __init__(self, pget, pset=no_default, pdel=no_default): |
40 |
480986cd29f2 |
self.pget = pget |
41 |
480986cd29f2 |
self.pset = pset |
42 |
480986cd29f2 |
self.pdel = pdel |
43 |
480986cd29f2 |
def __get__(self, instance, klass): |
44 |
480986cd29f2 |
class _element(object): |
45 |
480986cd29f2 |
"""real element to support set/get/del attr and item and js call |
46 |
480986cd29f2 |
style""" |
47 |
0e6d225da7b3 |
def __call__(prop, *args, **kwargs): |
48 |
0e6d225da7b3 |
return self.pget(instance, *args, **kwargs) |
49 |
480986cd29f2 |
__getattr__ = __getitem__ = __setattr__ = __setitem__ = __call__ |
50 |
480986cd29f2 |
def __delitem__(prop, name): |
51 |
85b9e5c80e08 |
if self.pdel is not no_default: |
52 |
480986cd29f2 |
return self.pdel(instance, name) |
53 |
480986cd29f2 |
else: |
54 |
480986cd29f2 |
raise NotImplementedError() |
55 |
480986cd29f2 |
__delattr__ = __delitem__ |
56 |
480986cd29f2 |
def __repr__(prop): |
57 |
480986cd29f2 |
return '<flexible_element %s>' % self.pget.func_name |
58 |
480986cd29f2 |
return _element() |
59 |
480986cd29f2 |
def __set__(self, instance, value): |
60 |
85b9e5c80e08 |
if self.pset is not no_default: |
61 |
480986cd29f2 |
self.pset(instance, value) |
62 |
480986cd29f2 |
else: |
63 |
480986cd29f2 |
raise NotImplementedError() |
64 |
b195c88d1d6a |
|
65 |
25fd6a1210de |
class PyQuery(list): |
66 |
817e3b5c8b58 |
"""The main class |
67 |
817e3b5c8b58 |
""" |
68 |
25fd6a1210de |
def __init__(self, *args, **kwargs): |
69 |
25fd6a1210de |
html = None |
70 |
66bab2bdeebd |
elements = [] |
71 |
e438752b3e14 |
self._base_url = None |
72 |
b45eaa021356 |
parser = kwargs.get('parser') |
73 |
3bce8f544387 |
if 'parser' in kwargs: |
74 |
3bce8f544387 |
del kwargs['parser'] |
75 |
a8fc828d5ac0 |
if not kwargs and len(args) == 1 and isinstance(args[0], basestring) \ |
76 |
a8fc828d5ac0 |
and args[0].startswith('http://'): |
77 |
a8fc828d5ac0 |
kwargs = {'url': args[0]} |
78 |
a8fc828d5ac0 |
args = [] |
79 |
de403a6ba45a |
|
80 |
de403a6ba45a |
if 'parent' in kwargs: |
81 |
de403a6ba45a |
self._parent = kwargs.pop('parent') |
82 |
de403a6ba45a |
else: |
83 |
85b9e5c80e08 |
self._parent = no_default |
84 |
de403a6ba45a |
|
85 |
0de160a2021f |
if kwargs: |
86 |
0de160a2021f |
# specific case to get the dom |
87 |
0de160a2021f |
if 'filename' in kwargs: |
88 |
0de160a2021f |
html = file(kwargs['filename']).read() |
89 |
0de160a2021f |
elif 'url' in kwargs: |
90 |
dbbfaaade0f5 |
url = kwargs.pop('url') |
91 |
dbbfaaade0f5 |
if 'opener' in kwargs: |
92 |
dbbfaaade0f5 |
opener = kwargs.pop('opener') |
93 |
dbbfaaade0f5 |
html = opener(url) |
94 |
dbbfaaade0f5 |
else: |
95 |
dbbfaaade0f5 |
from urllib2 import urlopen |
96 |
dbbfaaade0f5 |
html = urlopen(url).read() |
97 |
e438752b3e14 |
self._base_url = url |
98 |
0de160a2021f |
else: |
99 |
0de160a2021f |
raise ValueError('Invalid keyword arguments %s' % kwargs) |
100 |
a8fc828d5ac0 |
elements = fromstring(html, parser) |
101 |
66bab2bdeebd |
else: |
102 |
0de160a2021f |
# get nodes |
103 |
b195c88d1d6a |
|
104 |
0de160a2021f |
# determine context and selector if any |
105 |
85b9e5c80e08 |
selector = context = no_default |
106 |
25fd6a1210de |
length = len(args) |
107 |
25fd6a1210de |
if len(args) == 1: |
108 |
0de160a2021f |
context = args[0] |
109 |
0de160a2021f |
elif len(args) == 2: |
110 |
0de160a2021f |
selector, context = args |
111 |
c446e4bc1351 |
else: |
112 |
c446e4bc1351 |
raise ValueError("You can't do that." +\ |
113 |
c446e4bc1351 |
" Please, provide arguments") |
114 |
b195c88d1d6a |
|
115 |
0de160a2021f |
# get context |
116 |
0de160a2021f |
if isinstance(context, basestring): |
117 |
3c4766cfda86 |
try: |
118 |
a8fc828d5ac0 |
elements = fromstring(context, parser) |
119 |
3c4766cfda86 |
except Exception, e: |
120 |
3c4766cfda86 |
raise ValueError('%r, %s' % (e, context)) |
121 |
0de160a2021f |
elif isinstance(context, self.__class__): |
122 |
0de160a2021f |
# copy |
123 |
0de160a2021f |
elements = context[:] |
124 |
0de160a2021f |
elif isinstance(context, list): |
125 |
0de160a2021f |
elements = context |
126 |
0de160a2021f |
elif isinstance(context, etree._Element): |
127 |
0de160a2021f |
elements = [context] |
128 |
0de160a2021f |
|
129 |
0de160a2021f |
# select nodes |
130 |
85b9e5c80e08 |
if elements and selector is not no_default: |
131 |
0de160a2021f |
xpath = selector_to_xpath(selector) |
132 |
0de160a2021f |
results = [tag.xpath(xpath) for tag in elements] |
133 |
0de160a2021f |
# Flatten the results |
134 |
0de160a2021f |
elements = [] |
135 |
0de160a2021f |
for r in results: |
136 |
0de160a2021f |
elements.extend(r) |
137 |
66bab2bdeebd |
|
138 |
66bab2bdeebd |
list.__init__(self, elements) |
139 |
b195c88d1d6a |
|
140 |
0de160a2021f |
def __call__(self, *args): |
141 |
817e3b5c8b58 |
"""return a new PyQuery instance |
142 |
817e3b5c8b58 |
""" |
143 |
63b98839b8b8 |
length = len(args) |
144 |
63b98839b8b8 |
if length == 0: |
145 |
63b98839b8b8 |
raise ValueError('You must provide at least a selector') |
146 |
47c1c48137ad |
if len(args) == 1 and not args[0].startswith('<'): |
147 |
0de160a2021f |
args += (self,) |
148 |
de403a6ba45a |
result = self.__class__(*args, **dict(parent=self)) |
149 |
47c1c48137ad |
return result |
150 |
cb35449d0a5e |
|
151 |
f044bd088360 |
# keep original list api prefixed with _ |
152 |
f044bd088360 |
_append = list.append |
153 |
f044bd088360 |
_extend = list.extend |
154 |
f044bd088360 |
|
155 |
f044bd088360 |
# improve pythonic api |
156 |
f044bd088360 |
def __add__(self, other): |
157 |
f044bd088360 |
assert isinstance(other, self.__class__) |
158 |
f044bd088360 |
return self.__class__(self[:] + other[:]) |
159 |
f044bd088360 |
|
160 |
f044bd088360 |
def extend(self, other): |
161 |
f044bd088360 |
assert isinstance(other, self.__class__) |
162 |
f044bd088360 |
self._extend(other[:]) |
163 |
f044bd088360 |
|
164 |
b195c88d1d6a |
def __str__(self): |
165 |
4d28c8de8bf0 |
"""xml representation of current nodes:: |
166 |
4d28c8de8bf0 |
|
167 |
4d28c8de8bf0 |
>>> xml = PyQuery('<script><![[CDATA[ ]></script>', parser='html_fragments') |
168 |
4d28c8de8bf0 |
>>> print str(xml) |
169 |
4d28c8de8bf0 |
<script><![[CDATA[ ]></script> |
170 |
4d28c8de8bf0 |
|
171 |
817e3b5c8b58 |
""" |
172 |
66bab2bdeebd |
return ''.join([etree.tostring(e) for e in self]) |
173 |
b195c88d1d6a |
|
174 |
4d28c8de8bf0 |
def __html__(self): |
175 |
4d28c8de8bf0 |
"""html representation of current nodes:: |
176 |
4d28c8de8bf0 |
|
177 |
4d28c8de8bf0 |
>>> html = PyQuery('<script><![[CDATA[ ]></script>', parser='html_fragments') |
178 |
4d28c8de8bf0 |
>>> print html.__html__() |
179 |
4d28c8de8bf0 |
<script><![[CDATA[ ]></script> |
180 |
4d28c8de8bf0 |
|
181 |
4d28c8de8bf0 |
""" |
182 |
4d28c8de8bf0 |
return ''.join([lxml.html.tostring(e) for e in self]) |
183 |
4d28c8de8bf0 |
|
184 |
b195c88d1d6a |
def __repr__(self): |
185 |
b195c88d1d6a |
r = [] |
186 |
4cd95721e68f |
try: |
187 |
4cd95721e68f |
for el in self: |
188 |
4cd95721e68f |
c = el.get('class') |
189 |
4cd95721e68f |
c = c and '.' + '.'.join(c.split(' ')) or '' |
190 |
4cd95721e68f |
id = el.get('id') |
191 |
4cd95721e68f |
id = id and '#' + id or '' |
192 |
4cd95721e68f |
r.append('<%s%s%s>' % (el.tag, id, c)) |
193 |
4cd95721e68f |
return '[' + (', '.join(r)) + ']' |
194 |
4cd95721e68f |
except AttributeError: |
195 |
4cd95721e68f |
return list.__repr__(self) |
196 |
4cd95721e68f |
|
197 |
b195c88d1d6a |
|
198 |
b195c88d1d6a |
############## |
199 |
0387197e63da |
# Traversing # |
200 |
0387197e63da |
############## |
201 |
0387197e63da |
|
202 |
e461edb1d98f |
def _filter_only(self, selector, elements, reverse=False, unique=False): |
203 |
e461edb1d98f |
"""Filters the selection set only, as opposed to also including |
204 |
e461edb1d98f |
descendants. |
205 |
e461edb1d98f |
""" |
206 |
e461edb1d98f |
if selector is None: |
207 |
e461edb1d98f |
results = elements |
208 |
e461edb1d98f |
else: |
209 |
e461edb1d98f |
xpath = selector_to_xpath(selector, 'self::') |
210 |
e461edb1d98f |
results = [] |
211 |
e461edb1d98f |
for tag in elements: |
212 |
e461edb1d98f |
results.extend(tag.xpath(xpath)) |
213 |
e461edb1d98f |
if reverse: |
214 |
e461edb1d98f |
results.reverse() |
215 |
e461edb1d98f |
if unique: |
216 |
e461edb1d98f |
result_list = results |
217 |
e461edb1d98f |
results = [] |
218 |
e461edb1d98f |
for item in result_list: |
219 |
e461edb1d98f |
if not item in results: |
220 |
e461edb1d98f |
results.append(item) |
221 |
e461edb1d98f |
return self.__class__(results, **dict(parent=self)) |
222 |
e461edb1d98f |
|
223 |
e461edb1d98f |
def parent(self, selector=None): |
224 |
e461edb1d98f |
return self._filter_only(selector, [e.getparent() for e in self if e.getparent() is not None], unique = True) |
225 |
e461edb1d98f |
|
226 |
e461edb1d98f |
def prev(self, selector=None): |
227 |
e461edb1d98f |
return self._filter_only(selector, [e.getprevious() for e in self if e.getprevious() is not None]) |
228 |
e461edb1d98f |
|
229 |
e461edb1d98f |
def next(self, selector=None): |
230 |
e461edb1d98f |
return self._filter_only(selector, [e.getnext() for e in self if e.getnext() is not None]) |
231 |
e461edb1d98f |
|
232 |
e461edb1d98f |
def _traverse(self, method): |
233 |
e461edb1d98f |
for e in self: |
234 |
e461edb1d98f |
current = getattr(e, method)() |
235 |
e461edb1d98f |
while current is not None: |
236 |
e461edb1d98f |
yield current |
237 |
e461edb1d98f |
current = getattr(current, method)() |
238 |
e461edb1d98f |
|
239 |
e461edb1d98f |
def _traverse_parent_topdown(self): |
240 |
e461edb1d98f |
for e in self: |
241 |
e461edb1d98f |
this_list = [] |
242 |
e461edb1d98f |
current = e.getparent() |
243 |
e461edb1d98f |
while current is not None: |
244 |
e461edb1d98f |
this_list.append(current) |
245 |
e461edb1d98f |
current = current.getparent() |
246 |
e461edb1d98f |
this_list.reverse() |
247 |
e461edb1d98f |
for j in this_list: |
248 |
e461edb1d98f |
yield j |
249 |
e461edb1d98f |
|
250 |
e461edb1d98f |
def _nextAll(self): |
251 |
e461edb1d98f |
return [e for e in self._traverse('getnext')] |
252 |
e461edb1d98f |
|
253 |
e461edb1d98f |
def nextAll(self, selector=None): |
254 |
e461edb1d98f |
""" |
255 |
e461edb1d98f |
>>> d = PyQuery('<span><p class="hello">Hi</p><p>Bye</p><img scr=""/></span>') |
256 |
e461edb1d98f |
>>> d('p:last').nextAll() |
257 |
e461edb1d98f |
[<img>] |
258 |
e461edb1d98f |
""" |
259 |
e461edb1d98f |
return self._filter_only(selector, self._nextAll()) |
260 |
e461edb1d98f |
|
261 |
e461edb1d98f |
def _prevAll(self): |
262 |
e461edb1d98f |
return [e for e in self._traverse('getprevious')] |
263 |
e461edb1d98f |
|
264 |
e461edb1d98f |
def prevAll(self, selector=None): |
265 |
e461edb1d98f |
""" |
266 |
e461edb1d98f |
>>> d = PyQuery('<span><p class="hello">Hi</p><p>Bye</p><img scr=""/></span>') |
267 |
e461edb1d98f |
>>> d('p:last').prevAll() |
268 |
e461edb1d98f |
[<p.hello>] |
269 |
e461edb1d98f |
""" |
270 |
e461edb1d98f |
return self._filter_only(selector, self._prevAll(), reverse = True) |
271 |
e461edb1d98f |
|
272 |
e461edb1d98f |
def siblings(self, selector=None): |
273 |
e461edb1d98f |
""" |
274 |
e461edb1d98f |
>>> d = PyQuery('<span><p class="hello">Hi</p><p>Bye</p><img scr=""/></span>') |
275 |
e461edb1d98f |
>>> d('.hello').siblings() |
276 |
e461edb1d98f |
[<p>, <img>] |
277 |
e461edb1d98f |
>>> d('.hello').siblings('img') |
278 |
e461edb1d98f |
[<img>] |
279 |
e461edb1d98f |
""" |
280 |
e461edb1d98f |
return self._filter_only(selector, self._prevAll() + self._nextAll()) |
281 |
e461edb1d98f |
|
282 |
e461edb1d98f |
def parents(self, selector=None): |
283 |
e461edb1d98f |
""" |
284 |
e461edb1d98f |
>>> d = PyQuery('<span><p class="hello">Hi</p><p>Bye</p></span>') |
285 |
e461edb1d98f |
>>> d('p').parents() |
286 |
e461edb1d98f |
[<span>] |
287 |
e461edb1d98f |
>>> d('.hello').parents('span') |
288 |
e461edb1d98f |
[<span>] |
289 |
e461edb1d98f |
>>> d('.hello').parents('p') |
290 |
e461edb1d98f |
[] |
291 |
e461edb1d98f |
""" |
292 |
e461edb1d98f |
return self._filter_only( |
293 |
e461edb1d98f |
selector, |
294 |
e461edb1d98f |
[e for e in self._traverse_parent_topdown()], |
295 |
e461edb1d98f |
unique = True |
296 |
e461edb1d98f |
) |
297 |
e461edb1d98f |
|
298 |
e461edb1d98f |
def children(self, selector=None): |
299 |
e461edb1d98f |
"""Filter elements that are direct children of self using optional selector. |
300 |
e461edb1d98f |
|
301 |
e461edb1d98f |
>>> d = PyQuery('<span><p class="hello">Hi</p><p>Bye</p></span>') |
302 |
e461edb1d98f |
>>> d |
303 |
e461edb1d98f |
[<span>] |
304 |
e461edb1d98f |
>>> d.children() |
305 |
e461edb1d98f |
[<p.hello>, <p>] |
306 |
e461edb1d98f |
>>> d.children('.hello') |
307 |
e461edb1d98f |
[<p.hello>] |
308 |
e461edb1d98f |
""" |
309 |
e461edb1d98f |
elements = [child for tag in self for child in tag.getchildren()] |
310 |
e461edb1d98f |
return self._filter_only(selector, elements) |
311 |
e461edb1d98f |
|
312 |
dd7499560960 |
def filter(self, selector): |
313 |
84b61decea7c |
"""Filter elements in self using selector (string or function). |
314 |
84b61decea7c |
|
315 |
84b61decea7c |
>>> d = PyQuery('<p class="hello">Hi</p><p>Bye</p>') |
316 |
84b61decea7c |
>>> d('p') |
317 |
84b61decea7c |
[<p.hello>, <p>] |
318 |
84b61decea7c |
>>> d('p').filter('.hello') |
319 |
84b61decea7c |
[<p.hello>] |
320 |
84b61decea7c |
>>> d('p').filter(lambda i: i == 1) |
321 |
84b61decea7c |
[<p>] |
322 |
84b61decea7c |
>>> d('p').filter(lambda i: PyQuery(this).text() == 'Hi') |
323 |
84b61decea7c |
[<p.hello>] |
324 |
84b61decea7c |
""" |
325 |
5c01388c8376 |
if not callable(selector): |
326 |
e461edb1d98f |
return self._filter_only(selector, self) |
327 |
5c01388c8376 |
else: |
328 |
5c01388c8376 |
elements = [] |
329 |
5c01388c8376 |
try: |
330 |
5c01388c8376 |
for i, this in enumerate(self): |
331 |
5c01388c8376 |
selector.func_globals['this'] = this |
332 |
5c01388c8376 |
if selector(i): |
333 |
5c01388c8376 |
elements.append(this) |
334 |
5c01388c8376 |
finally: |
335 |
5c01388c8376 |
del selector.func_globals['this'] |
336 |
5c01388c8376 |
return self.__class__(elements, **dict(parent=self)) |
337 |
5c01388c8376 |
|
338 |
5c01388c8376 |
def not_(self, selector): |
339 |
84b61decea7c |
"""Return elements that don't match the given selector. |
340 |
84b61decea7c |
|
341 |
84b61decea7c |
>>> d = PyQuery('<p class="hello">Hi</p><p>Bye</p><div></div>') |
342 |
84b61decea7c |
>>> d('p').not_('.hello') |
343 |
84b61decea7c |
[<p>] |
344 |
84b61decea7c |
""" |
345 |
5c01388c8376 |
exclude = set(self.__class__(selector, self)) |
346 |
5c01388c8376 |
return self.__class__([e for e in self if e not in exclude], **dict(parent=self)) |
347 |
5c01388c8376 |
|
348 |
5c01388c8376 |
def is_(self, selector): |
349 |
84b61decea7c |
"""Returns True if selector matches at least one current element, else False. |
350 |
84b61decea7c |
>>> d = PyQuery('<p class="hello">Hi</p><p>Bye</p><div></div>') |
351 |
84b61decea7c |
>>> d('p').eq(0).is_('.hello') |
352 |
84b61decea7c |
True |
353 |
84b61decea7c |
>>> d('p').eq(1).is_('.hello') |
354 |
84b61decea7c |
False |
355 |
84b61decea7c |
""" |
356 |
5c01388c8376 |
return bool(self.__class__(selector, self)) |
357 |
5c01388c8376 |
|
358 |
dd7499560960 |
def find(self, selector): |
359 |
84b61decea7c |
"""Find elements using selector traversing down from self. |
360 |
84b61decea7c |
|
361 |
84b61decea7c |
>>> m = '<p><span><em>Whoah!</em></span></p><p><em> there</em></p>' |
362 |
84b61decea7c |
>>> d = PyQuery(m) |
363 |
84b61decea7c |
>>> d('p').find('em') |
364 |
84b61decea7c |
[<em>, <em>] |
365 |
84b61decea7c |
>>> d('p').eq(1).find('em') |
366 |
84b61decea7c |
[<em>] |
367 |
84b61decea7c |
""" |
368 |
dd7499560960 |
xpath = selector_to_xpath(selector) |
369 |
dd7499560960 |
results = [child.xpath(xpath) for tag in self for child in tag.getchildren()] |
370 |
dd7499560960 |
# Flatten the results |
371 |
dd7499560960 |
elements = [] |
372 |
dd7499560960 |
for r in results: |
373 |
dd7499560960 |
elements.extend(r) |
374 |
dd7499560960 |
return self.__class__(elements, **dict(parent=self)) |
375 |
dd7499560960 |
|
376 |
5c01388c8376 |
def eq(self, index): |
377 |
84b61decea7c |
"""Return PyQuery of only the element with the provided index. |
378 |
84b61decea7c |
|
379 |
84b61decea7c |
>>> d = PyQuery('<p class="hello">Hi</p><p>Bye</p><div></div>') |
380 |
84b61decea7c |
>>> d('p').eq(0) |
381 |
84b61decea7c |
[<p.hello>] |
382 |
84b61decea7c |
>>> d('p').eq(1) |
383 |
84b61decea7c |
[<p>] |
384 |
885c218b5a4f |
>>> d('p').eq(2) |
385 |
885c218b5a4f |
[] |
386 |
84b61decea7c |
""" |
387 |
885c218b5a4f |
# Use slicing to silently handle out of bounds indexes |
388 |
885c218b5a4f |
items = self[index:index+1] |
389 |
885c218b5a4f |
return self.__class__(items, **dict(parent=self)) |
390 |
5c01388c8376 |
|
391 |
0387197e63da |
def each(self, func): |
392 |
817e3b5c8b58 |
"""apply func on each nodes |
393 |
817e3b5c8b58 |
""" |
394 |
0387197e63da |
for e in self: |
395 |
0387197e63da |
func(self.__class__([e])) |
396 |
0387197e63da |
return self |
397 |
0387197e63da |
|
398 |
4cd95721e68f |
def map(self, func): |
399 |
4cd95721e68f |
"""Returns a new PyQuery after transforming current items with func. |
400 |
4cd95721e68f |
|
401 |
27a6952c1c09 |
func should take two arguments - 'index' and 'element'. Elements can |
402 |
4cd95721e68f |
also be referred to as 'this' inside of func. |
403 |
84b61decea7c |
|
404 |
84b61decea7c |
>>> d = PyQuery('<p class="hello">Hi there</p><p>Bye</p><br />') |
405 |
84b61decea7c |
>>> d('p').map(lambda i, e: PyQuery(e).text()) |
406 |
84b61decea7c |
['Hi there', 'Bye'] |
407 |
84b61decea7c |
|
408 |
84b61decea7c |
>>> d('p').map(lambda i, e: len(PyQuery(this).text())) |
409 |
84b61decea7c |
[8, 3] |
410 |
84b61decea7c |
|
411 |
84b61decea7c |
>>> d('p').map(lambda i, e: PyQuery(this).text().split()) |
412 |
84b61decea7c |
['Hi', 'there', 'Bye'] |
413 |
4cd95721e68f |
""" |
414 |
4cd95721e68f |
items = [] |
415 |
4cd95721e68f |
try: |
416 |
4cd95721e68f |
for i, element in enumerate(self): |
417 |
4cd95721e68f |
func.func_globals['this'] = element |
418 |
4cd95721e68f |
result = func(i, element) |
419 |
4cd95721e68f |
if result is not None: |
420 |
4cd95721e68f |
if not isinstance(result, list): |
421 |
4cd95721e68f |
items.append(result) |
422 |
4cd95721e68f |
else: |
423 |
4cd95721e68f |
items.extend(result) |
424 |
4cd95721e68f |
finally: |
425 |
4cd95721e68f |
del func.func_globals['this'] |
426 |
4cd95721e68f |
return self.__class__(items, **dict(parent=self)) |
427 |
4cd95721e68f |
|
428 |
0387197e63da |
@property |
429 |
0387197e63da |
def length(self): |
430 |
0387197e63da |
return len(self) |
431 |
0387197e63da |
|
432 |
817e3b5c8b58 |
def size(self): |
433 |
47c1c48137ad |
return len(self) |
434 |
47c1c48137ad |
|
435 |
dd7499560960 |
def end(self): |
436 |
84b61decea7c |
"""Break out of a level of traversal and return to the parent level. |
437 |
84b61decea7c |
|
438 |
84b61decea7c |
>>> m = '<p><span><em>Whoah!</em></span></p><p><em> there</em></p>' |
439 |
84b61decea7c |
>>> d = PyQuery(m) |
440 |
84b61decea7c |
>>> d('p').eq(1).find('em').end().end() |
441 |
84b61decea7c |
[<p>, <p>] |
442 |
84b61decea7c |
""" |
443 |
dd7499560960 |
return self._parent |
444 |
dd7499560960 |
|
445 |
0387197e63da |
############## |
446 |
b195c88d1d6a |
# Attributes # |
447 |
b195c88d1d6a |
############## |
448 |
0e6d225da7b3 |
def attr(self, *args, **kwargs): |
449 |
817e3b5c8b58 |
"""Attributes manipulation |
450 |
817e3b5c8b58 |
""" |
451 |
0e6d225da7b3 |
|
452 |
0e6d225da7b3 |
mapping = {'class_': 'class', 'for_': 'for'} |
453 |
0e6d225da7b3 |
|
454 |
85b9e5c80e08 |
attr = value = no_default |
455 |
0e6d225da7b3 |
length = len(args) |
456 |
0e6d225da7b3 |
if length == 1: |
457 |
0e6d225da7b3 |
attr = args[0] |
458 |
0e6d225da7b3 |
attr = mapping.get(attr, attr) |
459 |
0e6d225da7b3 |
elif length == 2: |
460 |
0e6d225da7b3 |
attr, value = args |
461 |
0e6d225da7b3 |
attr = mapping.get(attr, attr) |
462 |
0e6d225da7b3 |
elif kwargs: |
463 |
0e6d225da7b3 |
attr = {} |
464 |
0e6d225da7b3 |
for k, v in kwargs.items(): |
465 |
0e6d225da7b3 |
attr[mapping.get(k, k)] = v |
466 |
0e6d225da7b3 |
else: |
467 |
0e6d225da7b3 |
raise ValueError('Invalid arguments %s %s' % (args, kwargs)) |
468 |
0e6d225da7b3 |
|
469 |
b195c88d1d6a |
if not self: |
470 |
b195c88d1d6a |
return None |
471 |
0e6d225da7b3 |
elif isinstance(attr, dict): |
472 |
0e6d225da7b3 |
for tag in self: |
473 |
0e6d225da7b3 |
for key, value in attr.items(): |
474 |
0e6d225da7b3 |
tag.set(key, value) |
475 |
85b9e5c80e08 |
elif value is no_default: |
476 |
0e6d225da7b3 |
return self[0].get(attr) |
477 |
480986cd29f2 |
elif value is None or value == '': |
478 |
0e6d225da7b3 |
return self.removeAttr(attr) |
479 |
b195c88d1d6a |
else: |
480 |
b195c88d1d6a |
for tag in self: |
481 |
0e6d225da7b3 |
tag.set(attr, value) |
482 |
b195c88d1d6a |
return self |
483 |
b195c88d1d6a |
|
484 |
b195c88d1d6a |
def removeAttr(self, name): |
485 |
a2c59102bd0a |
"""Remove an attribute:: |
486 |
a2c59102bd0a |
|
487 |
a2c59102bd0a |
>>> d = PyQuery('<div id="myid"></div>') |
488 |
a2c59102bd0a |
>>> d.removeAttr('id') |
489 |
a2c59102bd0a |
[<div>] |
490 |
a2c59102bd0a |
|
491 |
817e3b5c8b58 |
""" |
492 |
b195c88d1d6a |
for tag in self: |
493 |
b195c88d1d6a |
del tag.attrib[name] |
494 |
b195c88d1d6a |
return self |
495 |
b195c88d1d6a |
|
496 |
85b9e5c80e08 |
attr = FlexibleElement(pget=attr, pdel=removeAttr) |
497 |
18a6f65ac437 |
|
498 |
b195c88d1d6a |
####### |
499 |
b195c88d1d6a |
# CSS # |
500 |
b195c88d1d6a |
####### |
501 |
85b9e5c80e08 |
def height(self, value=no_default): |
502 |
817e3b5c8b58 |
"""set/get height of element |
503 |
817e3b5c8b58 |
""" |
504 |
b195c88d1d6a |
return self.attr('height', value) |
505 |
b195c88d1d6a |
|
506 |
85b9e5c80e08 |
def width(self, value=no_default): |
507 |
817e3b5c8b58 |
"""set/get width of element |
508 |
817e3b5c8b58 |
""" |
509 |
b195c88d1d6a |
return self.attr('width', value) |
510 |
b195c88d1d6a |
|
511 |
a2c59102bd0a |
def hasClass(self, name): |
512 |
a2c59102bd0a |
"""Return True if element has class:: |
513 |
a2c59102bd0a |
|
514 |
a2c59102bd0a |
>>> d = PyQuery('<div class="myclass"></div>') |
515 |
a2c59102bd0a |
>>> d.hasClass('myclass') |
516 |
a2c59102bd0a |
True |
517 |
a2c59102bd0a |
|
518 |
a2c59102bd0a |
""" |
519 |
a2c59102bd0a |
return self.is_('.%s' % name) |
520 |
a2c59102bd0a |
|
521 |
b195c88d1d6a |
def addClass(self, value): |
522 |
a2c59102bd0a |
"""Add a css class to elements:: |
523 |
a2c59102bd0a |
|
524 |
a2c59102bd0a |
>>> d = PyQuery('<div></div>') |
525 |
a2c59102bd0a |
>>> d.addClass('myclass') |
526 |
a2c59102bd0a |
[<div.myclass>] |
527 |
a2c59102bd0a |
|
528 |
817e3b5c8b58 |
""" |
529 |
b195c88d1d6a |
for tag in self: |
530 |
b195c88d1d6a |
values = value.split(' ') |
531 |
b195c88d1d6a |
classes = set((tag.get('class') or '').split()) |
532 |
b195c88d1d6a |
classes = classes.union(values) |
533 |
b195c88d1d6a |
classes.difference_update(['']) |
534 |
b195c88d1d6a |
tag.set('class', ' '.join(classes)) |
535 |
b195c88d1d6a |
return self |
536 |
b195c88d1d6a |
|
537 |
b195c88d1d6a |
def removeClass(self, value): |
538 |
a2c59102bd0a |
"""Remove a css class to elements |
539 |
a2c59102bd0a |
|
540 |
a2c59102bd0a |
>>> d = PyQuery('<div class="myclass"></div>') |
541 |
a2c59102bd0a |
>>> d.removeClass('myclass') |
542 |
a2c59102bd0a |
[<div>] |
543 |
a2c59102bd0a |
|
544 |
817e3b5c8b58 |
""" |
545 |
b195c88d1d6a |
for tag in self: |
546 |
b195c88d1d6a |
values = value.split(' ') |
547 |
b195c88d1d6a |
classes = set((tag.get('class') or '').split()) |
548 |
b195c88d1d6a |
classes.difference_update(values) |
549 |
b195c88d1d6a |
classes.difference_update(['']) |
550 |
b195c88d1d6a |
tag.set('class', ' '.join(classes)) |
551 |
b195c88d1d6a |
return self |
552 |
b195c88d1d6a |
|
553 |
b195c88d1d6a |
def toggleClass(self, value): |
554 |
a2c59102bd0a |
"""Toggle a css class to elements |
555 |
a2c59102bd0a |
|
556 |
a2c59102bd0a |
>>> d = PyQuery('<div></div>') |
557 |
a2c59102bd0a |
>>> d.toggleClass('myclass') |
558 |
a2c59102bd0a |
[<div.myclass>] |
559 |
a2c59102bd0a |
|
560 |
817e3b5c8b58 |
""" |
561 |
b195c88d1d6a |
for tag in self: |
562 |
b195c88d1d6a |
values = set(value.split(' ')) |
563 |
b195c88d1d6a |
classes = set((tag.get('class') or '').split()) |
564 |
b195c88d1d6a |
values_to_add = values.difference(classes) |
565 |
b195c88d1d6a |
classes.difference_update(values) |
566 |
b195c88d1d6a |
classes = classes.union(values_to_add) |
567 |
b195c88d1d6a |
classes.difference_update(['']) |
568 |
b195c88d1d6a |
tag.set('class', ' '.join(classes)) |
569 |
b195c88d1d6a |
return self |
570 |
b195c88d1d6a |
|
571 |
0e6d225da7b3 |
def css(self, *args, **kwargs): |
572 |
817e3b5c8b58 |
"""css attributes manipulation |
573 |
817e3b5c8b58 |
""" |
574 |
0e6d225da7b3 |
|
575 |
85b9e5c80e08 |
attr = value = no_default |
576 |
0e6d225da7b3 |
length = len(args) |
577 |
0e6d225da7b3 |
if length == 1: |
578 |
0e6d225da7b3 |
attr = args[0] |
579 |
0e6d225da7b3 |
elif length == 2: |
580 |
0e6d225da7b3 |
attr, value = args |
581 |
0e6d225da7b3 |
elif kwargs: |
582 |
0e6d225da7b3 |
attr = kwargs |
583 |
0e6d225da7b3 |
else: |
584 |
0e6d225da7b3 |
raise ValueError('Invalid arguments %s %s' % (args, kwargs)) |
585 |
0e6d225da7b3 |
|
586 |
480986cd29f2 |
if isinstance(attr, dict): |
587 |
b195c88d1d6a |
for tag in self: |
588 |
0e6d225da7b3 |
stripped_keys = [key.strip().replace('_', '-') |
589 |
0e6d225da7b3 |
for key in attr.keys()] |
590 |
b195c88d1d6a |
current = [el.strip() |
591 |
b195c88d1d6a |
for el in (tag.get('style') or '').split(';') |
592 |
b195c88d1d6a |
if el.strip() |
593 |
b195c88d1d6a |
and not el.split(':')[0].strip() in stripped_keys] |
594 |
b195c88d1d6a |
for key, value in attr.items(): |
595 |
0e6d225da7b3 |
key = key.replace('_', '-') |
596 |
b195c88d1d6a |
current.append('%s: %s' % (key, value)) |
597 |
b195c88d1d6a |
tag.set('style', '; '.join(current)) |
598 |
480986cd29f2 |
elif isinstance(value, basestring): |
599 |
0e6d225da7b3 |
attr = attr.replace('_', '-') |
600 |
b195c88d1d6a |
for tag in self: |
601 |
b195c88d1d6a |
current = [el.strip() |
602 |
b195c88d1d6a |
for el in (tag.get('style') or '').split(';') |
603 |
b195c88d1d6a |
if el.strip() |
604 |
b195c88d1d6a |
and not el.split(':')[0].strip() == attr.strip()] |
605 |
b195c88d1d6a |
current.append('%s: %s' % (attr, value)) |
606 |
b195c88d1d6a |
tag.set('style', '; '.join(current)) |
607 |
b195c88d1d6a |
return self |
608 |
b195c88d1d6a |
|
609 |
85b9e5c80e08 |
css = FlexibleElement(pget=css, pset=css) |
610 |
480986cd29f2 |
|
611 |
b195c88d1d6a |
################### |
612 |
b195c88d1d6a |
# CORE UI EFFECTS # |
613 |
b195c88d1d6a |
################### |
614 |
b195c88d1d6a |
def hide(self): |
615 |
817e3b5c8b58 |
"""add display:none to elements style |
616 |
817e3b5c8b58 |
""" |
617 |
b195c88d1d6a |
return self.css('display', 'none') |
618 |
b195c88d1d6a |
|
619 |
b195c88d1d6a |
def show(self): |
620 |
817e3b5c8b58 |
"""add display:block to elements style |
621 |
817e3b5c8b58 |
""" |
622 |
b195c88d1d6a |
return self.css('display', 'block') |
623 |
b195c88d1d6a |
|
624 |
b195c88d1d6a |
######## |
625 |
b195c88d1d6a |
# HTML # |
626 |
b195c88d1d6a |
######## |
627 |
85b9e5c80e08 |
def val(self, value=no_default): |
628 |
a2c59102bd0a |
"""Set/get the attribute value:: |
629 |
a2c59102bd0a |
|
630 |
a2c59102bd0a |
>>> d = PyQuery('<input />') |
631 |
a2c59102bd0a |
>>> d.val('Youhou') |
632 |
a2c59102bd0a |
[<input>] |
633 |
a2c59102bd0a |
>>> d.val() |
634 |
a2c59102bd0a |
'Youhou' |
635 |
a2c59102bd0a |
|
636 |
817e3b5c8b58 |
""" |
637 |
b195c88d1d6a |
return self.attr('value', value) |
638 |
b195c88d1d6a |
|
639 |
85b9e5c80e08 |
def html(self, value=no_default): |
640 |
a2c59102bd0a |
"""Get or set the html representation of sub nodes. |
641 |
a2c59102bd0a |
|
642 |
a2c59102bd0a |
Get the text value:: |
643 |
a2c59102bd0a |
|
644 |
a2c59102bd0a |
>>> doc = PyQuery('<div><span>toto</span></div>') |
645 |
a2c59102bd0a |
>>> print doc.html() |
646 |
a2c59102bd0a |
<span>toto</span> |
647 |
a2c59102bd0a |
|
648 |
a2c59102bd0a |
Set the text value:: |
649 |
a2c59102bd0a |
|
650 |
a2c59102bd0a |
>>> doc.html('<span>Youhou !</span>') |
651 |
a2c59102bd0a |
[<div>] |
652 |
a2c59102bd0a |
>>> print doc |
653 |
a2c59102bd0a |
<div><span>Youhou !</span></div> |
654 |
817e3b5c8b58 |
""" |
655 |
85b9e5c80e08 |
if value is no_default: |
656 |
b195c88d1d6a |
if not self: |
657 |
b195c88d1d6a |
return None |
658 |
b195c88d1d6a |
tag = self[0] |
659 |
b195c88d1d6a |
children = tag.getchildren() |
660 |
b195c88d1d6a |
if not children: |
661 |
b195c88d1d6a |
return tag.text |
662 |
b195c88d1d6a |
html = tag.text or '' |
663 |
4275b9e73246 |
html += ''.join(map(lambda x: etree.tostring(x, encoding=unicode), children)) |
664 |
b195c88d1d6a |
return html |
665 |
480986cd29f2 |
else: |
666 |
480986cd29f2 |
if isinstance(value, self.__class__): |
667 |
480986cd29f2 |
new_html = str(value) |
668 |
480986cd29f2 |
elif isinstance(value, basestring): |
669 |
480986cd29f2 |
new_html = value |
670 |
b195c88d1d6a |
|
671 |
480986cd29f2 |
for tag in self: |
672 |
480986cd29f2 |
for child in tag.getchildren(): |
673 |
480986cd29f2 |
tag.remove(child) |
674 |
480986cd29f2 |
root = etree.fromstring('<root>' + new_html + '</root>') |
675 |
480986cd29f2 |
children = root.getchildren() |
676 |
480986cd29f2 |
if children: |
677 |
480986cd29f2 |
tag.extend(children) |
678 |
480986cd29f2 |
tag.text = root.text |
679 |
480986cd29f2 |
tag.tail = root.tail |
680 |
b195c88d1d6a |
return self |
681 |
b195c88d1d6a |
|
682 |
85b9e5c80e08 |
def text(self, value=no_default): |
683 |
bdfa559cd0e1 |
"""Get or set the text representation of sub nodes. |
684 |
56528aa564ba |
|
685 |
bdfa559cd0e1 |
Get the text value:: |
686 |
a2c59102bd0a |
|
687 |
56528aa564ba |
>>> doc = PyQuery('<div><span>toto</span><span>tata</span></div>') |
688 |
56528aa564ba |
>>> print doc.text() |
689 |
56528aa564ba |
toto tata |
690 |
56528aa564ba |
|
691 |
bdfa559cd0e1 |
Set the text value:: |
692 |
bdfa559cd0e1 |
|
693 |
56528aa564ba |
>>> doc.text('Youhou !') |
694 |
56528aa564ba |
[<div>] |
695 |
56528aa564ba |
>>> print doc |
696 |
56528aa564ba |
<div>Youhou !</div> |
697 |
56528aa564ba |
|
698 |
817e3b5c8b58 |
""" |
699 |
b195c88d1d6a |
|
700 |
85b9e5c80e08 |
if value is no_default: |
701 |
b195c88d1d6a |
if not self: |
702 |
b195c88d1d6a |
return None |
703 |
541798196c20 |
|
704 |
541798196c20 |
text = [] |
705 |
541798196c20 |
|
706 |
541798196c20 |
def add_text(tag, no_tail=False): |
707 |
541798196c20 |
if tag.text: |
708 |
541798196c20 |
text.append(tag.text) |
709 |
541798196c20 |
for child in tag.getchildren(): |
710 |
541798196c20 |
add_text(child) |
711 |
541798196c20 |
if not no_tail and tag.tail: |
712 |
541798196c20 |
text.append(tag.tail) |
713 |
541798196c20 |
|
714 |
541798196c20 |
for tag in self: |
715 |
541798196c20 |
add_text(tag, no_tail=True) |
716 |
541798196c20 |
return ' '.join([t.strip() for t in text if t.strip()]) |
717 |
b195c88d1d6a |
|
718 |
b195c88d1d6a |
for tag in self: |
719 |
b195c88d1d6a |
for child in tag.getchildren(): |
720 |
b195c88d1d6a |
tag.remove(child) |
721 |
b195c88d1d6a |
tag.text = value |
722 |
b195c88d1d6a |
return self |
723 |
b195c88d1d6a |
|
724 |
b195c88d1d6a |
################ |
725 |
b195c88d1d6a |
# Manipulating # |
726 |
b195c88d1d6a |
################ |
727 |
b195c88d1d6a |
|
728 |
59706fab67c5 |
def _get_root(self, value): |
729 |
8fc2caef7b4f |
if isinstance(value, basestring): |
730 |
b195c88d1d6a |
root = etree.fromstring('<root>' + value + '</root>') |
731 |
8fc2caef7b4f |
elif isinstance(value, etree._Element): |
732 |
8fc2caef7b4f |
root = self.__class__(value) |
733 |
8fc2caef7b4f |
elif isinstance(value, PyQuery): |
734 |
b195c88d1d6a |
root = value |
735 |
8fc2caef7b4f |
else: |
736 |
8fc2caef7b4f |
raise TypeError( |
737 |
8fc2caef7b4f |
'Value must be string, PyQuery or Element. Got %r' % value) |
738 |
b195c88d1d6a |
if hasattr(root, 'text') and isinstance(root.text, basestring): |
739 |
b195c88d1d6a |
root_text = root.text |
740 |
b195c88d1d6a |
else: |
741 |
b195c88d1d6a |
root_text = '' |
742 |
59706fab67c5 |
return root, root_text |
743 |
59706fab67c5 |
|
744 |
59706fab67c5 |
def append(self, value): |
745 |
817e3b5c8b58 |
"""append value to each nodes |
746 |
817e3b5c8b58 |
""" |
747 |
59706fab67c5 |
root, root_text = self._get_root(value) |
748 |
b195c88d1d6a |
for i, tag in enumerate(self): |
749 |
b195c88d1d6a |
if len(tag) > 0: # if the tag has children |
750 |
b195c88d1d6a |
last_child = tag[-1] |
751 |
b195c88d1d6a |
if not last_child.tail: |
752 |
b195c88d1d6a |
last_child.tail = '' |
753 |
b195c88d1d6a |
last_child.tail += root_text |
754 |
b195c88d1d6a |
else: |
755 |
b195c88d1d6a |
if not tag.text: |
756 |
b195c88d1d6a |
tag.text = '' |
757 |
b195c88d1d6a |
tag.text += root_text |
758 |
b195c88d1d6a |
if i > 0: |
759 |
b195c88d1d6a |
root = deepcopy(list(root)) |
760 |
b195c88d1d6a |
tag.extend(root) |
761 |
b195c88d1d6a |
root = tag[-len(root):] |
762 |
b195c88d1d6a |
return self |
763 |
b195c88d1d6a |
|
764 |
b195c88d1d6a |
def appendTo(self, value): |
765 |
817e3b5c8b58 |
"""append nodes to value |
766 |
817e3b5c8b58 |
""" |
767 |
b195c88d1d6a |
value.append(self) |
768 |
b195c88d1d6a |
return self |
769 |
b195c88d1d6a |
|
770 |
b195c88d1d6a |
def prepend(self, value): |
771 |
817e3b5c8b58 |
"""prepend value to nodes |
772 |
817e3b5c8b58 |
""" |
773 |
59706fab67c5 |
root, root_text = self._get_root(value) |
774 |
b195c88d1d6a |
for i, tag in enumerate(self): |
775 |
b195c88d1d6a |
if not tag.text: |
776 |
b195c88d1d6a |
tag.text = '' |
777 |
b195c88d1d6a |
if len(root) > 0: |
778 |
b195c88d1d6a |
root[-1].tail = tag.text |
779 |
b195c88d1d6a |
tag.text = root_text |
780 |
b195c88d1d6a |
else: |
781 |
b195c88d1d6a |
tag.text = root_text + tag.text |
782 |
b195c88d1d6a |
if i > 0: |
783 |
b195c88d1d6a |
root = deepcopy(list(root)) |
784 |
b195c88d1d6a |
tag[:0] = root |
785 |
b195c88d1d6a |
root = tag[:len(root)] |
786 |
b195c88d1d6a |
return self |
787 |
b195c88d1d6a |
|
788 |
b195c88d1d6a |
def prependTo(self, value): |
789 |
817e3b5c8b58 |
"""prepend nodes to value |
790 |
817e3b5c8b58 |
""" |
791 |
b195c88d1d6a |
value.prepend(self) |
792 |
b195c88d1d6a |
return self |
793 |
59706fab67c5 |
|
794 |
59706fab67c5 |
def after(self, value): |
795 |
817e3b5c8b58 |
"""add value after nodes |
796 |
817e3b5c8b58 |
""" |
797 |
59706fab67c5 |
root, root_text = self._get_root(value) |
798 |
59706fab67c5 |
for i, tag in enumerate(self): |
799 |
59706fab67c5 |
if not tag.tail: |
800 |
59706fab67c5 |
tag.tail = '' |
801 |
59706fab67c5 |
tag.tail += root_text |
802 |
59706fab67c5 |
if i > 0: |
803 |
59706fab67c5 |
root = deepcopy(list(root)) |
804 |
59706fab67c5 |
parent = tag.getparent() |
805 |
59706fab67c5 |
index = parent.index(tag) + 1 |
806 |
3c7ab75c2eea |
parent[index:index] = root |
807 |
59706fab67c5 |
root = parent[index:len(root)] |
808 |
59706fab67c5 |
return self |
809 |
59706fab67c5 |
|
810 |
59706fab67c5 |
def insertAfter(self, value): |
811 |
817e3b5c8b58 |
"""insert nodes after value |
812 |
817e3b5c8b58 |
""" |
813 |
59706fab67c5 |
value.after(self) |
814 |
59706fab67c5 |
return self |
815 |
59706fab67c5 |
|
816 |
59706fab67c5 |
def before(self, value): |
817 |
817e3b5c8b58 |
"""insert value before nodes |
818 |
817e3b5c8b58 |
""" |
819 |
59706fab67c5 |
root, root_text = self._get_root(value) |
820 |
59706fab67c5 |
for i, tag in enumerate(self): |
821 |
3c7ab75c2eea |
previous = tag.getprevious() |
822 |
3c7ab75c2eea |
if previous != None: |
823 |
3c7ab75c2eea |
if not previous.tail: |
824 |
3c7ab75c2eea |
previous.tail = '' |
825 |
3c7ab75c2eea |
previous.tail += root_text |
826 |
3c7ab75c2eea |
else: |
827 |
3c7ab75c2eea |
parent = tag.getparent() |
828 |
3c7ab75c2eea |
if not parent.text: |
829 |
3c7ab75c2eea |
parent.text = '' |
830 |
3c7ab75c2eea |
parent.text += root_text |
831 |
59706fab67c5 |
if i > 0: |
832 |
59706fab67c5 |
root = deepcopy(list(root)) |
833 |
59706fab67c5 |
parent = tag.getparent() |
834 |
3c7ab75c2eea |
index = parent.index(tag) |
835 |
3c7ab75c2eea |
parent[index:index] = root |
836 |
59706fab67c5 |
root = parent[index:len(root)] |
837 |
59706fab67c5 |
return self |
838 |
3c7ab75c2eea |
|
839 |
3c7ab75c2eea |
def insertBefore(self, value): |
840 |
817e3b5c8b58 |
"""insert nodes before value |
841 |
817e3b5c8b58 |
""" |
842 |
3c7ab75c2eea |
value.before(self) |
843 |
3c7ab75c2eea |
return self |
844 |
3c7ab75c2eea |
|
845 |
56528aa564ba |
def wrap(self, value): |
846 |
56528aa564ba |
"""A string of HTML that will be created on the fly and wrapped around |
847 |
56528aa564ba |
each target:: |
848 |
56528aa564ba |
|
849 |
56528aa564ba |
>>> d = PyQuery('<span>youhou</span>') |
850 |
56528aa564ba |
>>> d.wrap('<div></div>') |
851 |
56528aa564ba |
[<div>] |
852 |
56528aa564ba |
>>> print d |
853 |
56528aa564ba |
<div><span>youhou</span></div> |
854 |
bdfa559cd0e1 |
|
855 |
56528aa564ba |
""" |
856 |
56528aa564ba |
assert isinstance(value, basestring) |
857 |
a8fc828d5ac0 |
value = fromstring(value)[0] |
858 |
56528aa564ba |
nodes = [] |
859 |
56528aa564ba |
for tag in self: |
860 |
56528aa564ba |
wrapper = deepcopy(value) |
861 |
56528aa564ba |
# FIXME: using iterchildren is probably not optimal |
862 |
cf3908ac2b0b |
if not wrapper.getchildren(): |
863 |
cf3908ac2b0b |
wrapper.append(deepcopy(tag)) |
864 |
56528aa564ba |
else: |
865 |
cf3908ac2b0b |
childs = [c for c in wrapper.iterchildren()] |
866 |
56528aa564ba |
child = childs[-1] |
867 |
cf3908ac2b0b |
child.append(deepcopy(tag)) |
868 |
cf3908ac2b0b |
nodes.append(wrapper) |
869 |
56528aa564ba |
|
870 |
56528aa564ba |
parent = tag.getparent() |
871 |
56528aa564ba |
if parent is not None: |
872 |
56528aa564ba |
for t in parent.iterchildren(): |
873 |
56528aa564ba |
if t is tag: |
874 |
cf3908ac2b0b |
t.addnext(wrapper) |
875 |
56528aa564ba |
parent.remove(t) |
876 |
56528aa564ba |
break |
877 |
56528aa564ba |
self[:] = nodes |
878 |
56528aa564ba |
return self |
879 |
56528aa564ba |
|
880 |
cf3908ac2b0b |
def wrapAll(self, value): |
881 |
cf3908ac2b0b |
"""Wrap all the elements in the matched set into a single wrapper element:: |
882 |
cf3908ac2b0b |
|
883 |
cf3908ac2b0b |
>>> d = PyQuery('<div><span>Hey</span><span>you !</span></div>') |
884 |
cf3908ac2b0b |
>>> print d('span').wrapAll('<div id="wrapper"></div>') |
885 |
c5f32dcc4ec2 |
<div id="wrapper"><span>Hey</span><span>you !</span></div> |
886 |
cf3908ac2b0b |
|
887 |
cf3908ac2b0b |
""" |
888 |
cf3908ac2b0b |
if not self: |
889 |
cf3908ac2b0b |
return self |
890 |
cf3908ac2b0b |
|
891 |
cf3908ac2b0b |
assert isinstance(value, basestring) |
892 |
a8fc828d5ac0 |
value = fromstring(value)[0] |
893 |
cf3908ac2b0b |
wrapper = deepcopy(value) |
894 |
cf3908ac2b0b |
if not wrapper.getchildren(): |
895 |
cf3908ac2b0b |
child = wrapper |
896 |
cf3908ac2b0b |
else: |
897 |
cf3908ac2b0b |
childs = [c for c in wrapper.iterchildren()] |
898 |
cf3908ac2b0b |
child = childs[-1] |
899 |
cf3908ac2b0b |
|
900 |
cf3908ac2b0b |
replace_childs = True |
901 |
cf3908ac2b0b |
parent = self[0].getparent() |
902 |
cf3908ac2b0b |
if parent is None: |
903 |
cf3908ac2b0b |
parent = no_default |
904 |
cf3908ac2b0b |
|
905 |
cf3908ac2b0b |
# add nodes to wrapper and check parent |
906 |
cf3908ac2b0b |
for tag in self: |
907 |
cf3908ac2b0b |
child.append(deepcopy(tag)) |
908 |
cf3908ac2b0b |
if tag.getparent() is not parent: |
909 |
cf3908ac2b0b |
replace_childs = False |
910 |
cf3908ac2b0b |
|
911 |
cf3908ac2b0b |
# replace nodes i parent if possible |
912 |
cf3908ac2b0b |
if parent is not no_default and replace_childs: |
913 |
cf3908ac2b0b |
childs = [c for c in parent.iterchildren()] |
914 |
cf3908ac2b0b |
if len(childs) == len(self): |
915 |
cf3908ac2b0b |
for tag in self: |
916 |
cf3908ac2b0b |
parent.remove(tag) |
917 |
cf3908ac2b0b |
parent.append(wrapper) |
918 |
cf3908ac2b0b |
|
919 |
cf3908ac2b0b |
self[:] = [wrapper] |
920 |
cf3908ac2b0b |
return self |
921 |
cf3908ac2b0b |
|
922 |
47c1c48137ad |
def replaceWith(self, value): |
923 |
817e3b5c8b58 |
"""replace nodes by value |
924 |
817e3b5c8b58 |
""" |
925 |
47c1c48137ad |
self.before(value) |
926 |
47c1c48137ad |
for tag in self: |
927 |
47c1c48137ad |
parent = tag.getparent() |
928 |
47c1c48137ad |
parent.remove(tag) |
929 |
47c1c48137ad |
return self |
930 |
47c1c48137ad |
|
931 |
47c1c48137ad |
def replaceAll(self, expr): |
932 |
817e3b5c8b58 |
"""replace nodes by expr |
933 |
817e3b5c8b58 |
""" |
934 |
85b9e5c80e08 |
if self._parent is no_default: |
935 |
cb35449d0a5e |
raise ValueError( |
936 |
cb35449d0a5e |
'replaceAll can only be used with an object with parent') |
937 |
47c1c48137ad |
self._parent(expr).replaceWith(self) |
938 |
47c1c48137ad |
return self |
939 |
47c1c48137ad |
|
940 |
3c7ab75c2eea |
def clone(self): |
941 |
817e3b5c8b58 |
"""return a copy of nodes |
942 |
817e3b5c8b58 |
""" |
943 |
3c7ab75c2eea |
self[:] = [deepcopy(tag) for tag in self] |
944 |
3c7ab75c2eea |
return self |
945 |
3c7ab75c2eea |
|
946 |
3c7ab75c2eea |
def empty(self): |
947 |
817e3b5c8b58 |
"""remove nodes content |
948 |
817e3b5c8b58 |
""" |
949 |
3c7ab75c2eea |
for tag in self: |
950 |
3c7ab75c2eea |
tag.text = None |
951 |
3c7ab75c2eea |
tag[:] = [] |
952 |
3c7ab75c2eea |
return self |
953 |
3c7ab75c2eea |
|
954 |
85b9e5c80e08 |
def remove(self, expr=no_default): |
955 |
817e3b5c8b58 |
"""remove nodes |
956 |
817e3b5c8b58 |
""" |
957 |
85b9e5c80e08 |
if expr is no_default: |
958 |
3c7ab75c2eea |
for tag in self: |
959 |
3c7ab75c2eea |
parent = tag.getparent() |
960 |
e461edb1d98f |
if parent is not None: |
961 |
e461edb1d98f |
if tag.tail: |
962 |
e461edb1d98f |
if not parent.text: |
963 |
e461edb1d98f |
parent.text = '' |
964 |
e461edb1d98f |
parent.text += ' ' + tag.tail |
965 |
e461edb1d98f |
parent.remove(tag) |
966 |
480986cd29f2 |
else: |
967 |
0de160a2021f |
results = self.__class__(expr, self) |
968 |
3c7ab75c2eea |
results.remove() |
969 |
3c7ab75c2eea |
return self |
970 |
e438752b3e14 |
|
971 |
e438752b3e14 |
##################################################### |
972 |
e438752b3e14 |
# Additional methods that are not in the jQuery API # |
973 |
e438752b3e14 |
##################################################### |
974 |
e438752b3e14 |
|
975 |
e438752b3e14 |
@property |
976 |
e438752b3e14 |
def base_url(self): |
977 |
e438752b3e14 |
"""Return the url of current html document or None if not available. |
978 |
e438752b3e14 |
""" |
979 |
e438752b3e14 |
if self._base_url is not None: |
980 |
e438752b3e14 |
return self._base_url |
981 |
e438752b3e14 |
if self._parent is not no_default: |
982 |
e438752b3e14 |
return self._parent.base_url |
983 |
e438752b3e14 |
|
984 |
e438752b3e14 |
def make_links_absolute(self, base_url=None): |
985 |
e438752b3e14 |
"""Make all links absolute. |
986 |
e438752b3e14 |
""" |
987 |
e438752b3e14 |
if base_url is None: |
988 |
e438752b3e14 |
base_url = self.base_url |
989 |
e438752b3e14 |
if base_url is None: |
990 |
e438752b3e14 |
raise ValueError('You need a base URL to make your links' |
991 |
e438752b3e14 |
'absolute. It can be provided by the base_url parameter.') |
992 |
e438752b3e14 |
|
993 |
e438752b3e14 |
self('a').each(lambda a: |
994 |
e438752b3e14 |
a.attr('href', urljoin(base_url, a.attr('href')))) |
995 |
e438752b3e14 |
return self |
