hf-key-scraper/huggingface/huggingface/pipelines.py

from scrapy import Field, Item


# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html


# useful for handling different item types with a single interface


class HuggingfacePipeline:
    def process_item(self, item, spider):
        return item


class FoundItem(Item):
    url = Field()
    matches = Field()


class FilePipeline:
    def open_spider(self, spider):
        self.file = open(spider.filename or 'items.txt', 'w')

    def close_spider(self, spider):
        self.file.close()

    def process_item(self, item, spider):
        # line = f"URL: {item['url']}, Matches: {item['matches']}\n"
        for m in item['matches']:
            self.file.write(m + '\n')
        self.file.flush()  # Ensure the data is written to the disk immediately
        return item