This repository has been archived on 2023-10-20. You can view files and clone it, but cannot push or open issues or pull requests.
hf-key-scraper/huggingface/huggingface/pipelines.py

36 lines
893 B
Python

from scrapy import Field, Item
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface
class HuggingfacePipeline:
def process_item(self, item, spider):
return item
class FoundItem(Item):
url = Field()
matches = Field()
class FilePipeline:
def open_spider(self, spider):
self.file = open(spider.filename or 'items.txt', 'w')
def close_spider(self, spider):
self.file.close()
def process_item(self, item, spider):
# line = f"URL: {item['url']}, Matches: {item['matches']}\n"
for m in item['matches']:
self.file.write(m + '\n')
self.file.flush() # Ensure the data is written to the disk immediately
return item