from scrapy import Field, Item # Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html # useful for handling different item types with a single interface class HuggingfacePipeline: def process_item(self, item, spider): return item class FoundItem(Item): url = Field() matches = Field() class FilePipeline: def open_spider(self, spider): self.file = open(spider.filename or 'items.txt', 'w') def close_spider(self, spider): self.file.close() def process_item(self, item, spider): # line = f"URL: {item['url']}, Matches: {item['matches']}\n" for m in item['matches']: self.file.write(m + '\n') self.file.flush() # Ensure the data is written to the disk immediately return item