关于scrapy中meta={‘item’:item}传递用法

发布于 2019-09-11  222 次阅读


def parse_url(self,response):
    print(response.url)
    item = JinsamaItem ()   #在前面定义item
    item['class_h'] = response.xpath ('//div[@class="placenav"]/a[2]/text()').extract_first ()
    datas = response.xpath('//div[@class="newslist"]/dl/dt/a/@href').extract()
    jin = response.xpath ('//a[contains(text(),"下一页")]/@href').extract_first ()
    for data in datas:
        if data.startswith('/'):
            url = "http://www.haha56.net" + data
        else:
            url = data
        yield scrapy.Request(url,meta={'item':item},callback=self.parse_details)   #此处用meta=传递
    if jin is not None:
        base_url = response.xpath ('//div[@class="placenav"]/a[2]/@href').extract_first ()
        next_urls = response.xpath ('//a[contains(text(),"下一页")]/@href').extract_first ()
        if base_url.startswith ('http'):
            next_url = base_url + next_urls
        else:
            next_url = "http://www.haha56.net" + base_url + next_urls
        yield scrapy.Request (next_url, callback=self.parse_url)


def parse_details(self,response):
    item = response.meta['item']   #传递到这里,不需要再次定义item
    title = response.xpath('//div[@class="newsview"]/*[@class="title"]/text()').extract_first()
    url = response.url
    # article = response.css('.content').xpath('string(.)').extract_first()
    article = response.css ('.content').extract_first ()
    item['title'] = title
    item['url'] = url
    item['article'] = article
    # print(item)
    yield item


人間になるために生まれて、私はとても悲しいです