36 lines
893 B
Python
36 lines
893 B
Python
from scrapy import Field, Item
|
|
|
|
|
|
# Define your item pipelines here
|
|
#
|
|
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
|
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
|
|
|
|
|
# useful for handling different item types with a single interface
|
|
|
|
|
|
class HuggingfacePipeline:
|
|
def process_item(self, item, spider):
|
|
return item
|
|
|
|
|
|
class FoundItem(Item):
|
|
url = Field()
|
|
matches = Field()
|
|
|
|
|
|
class FilePipeline:
|
|
def open_spider(self, spider):
|
|
self.file = open(spider.filename or 'items.txt', 'w')
|
|
|
|
def close_spider(self, spider):
|
|
self.file.close()
|
|
|
|
def process_item(self, item, spider):
|
|
# line = f"URL: {item['url']}, Matches: {item['matches']}\n"
|
|
for m in item['matches']:
|
|
self.file.write(m + '\n')
|
|
self.file.flush() # Ensure the data is written to the disk immediately
|
|
return item
|