From 171f1755835f4a84769f6c9d65189cebd224e503 Mon Sep 17 00:00:00 2001 From: Andreas Kempe <kempe@lysator.liu.se> Date: Fri, 12 Mar 2021 00:32:05 +0100 Subject: [PATCH] Add basic 4chan importer This adds a basic 4chan importer that will import all threads from a board. It creates the OP as a new post and then puts all the replies in the corresponding thread as replies to the OP. --- fourchan2nntp.py | 19 ++++++++++ rss2nntp/fourchan.py | 85 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100755 fourchan2nntp.py create mode 100644 rss2nntp/fourchan.py diff --git a/fourchan2nntp.py b/fourchan2nntp.py new file mode 100755 index 0000000..e1cee56 --- /dev/null +++ b/fourchan2nntp.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 + +from time import sleep +from getpass import getpass + +import nntplib +from rss2nntp.fourchan import FourChan + +if __name__ == '__main__': + password = getpass('Password:') + + while True: + chan = FourChan('lysator.import.4chan.a', + 'kempe@lysator.liu.se', 'a', 'fourchan.sqlite3', + 'kempe', password, 'nyheter.lysator.liu.se') + chan.parse() + + sleep(600) + diff --git a/rss2nntp/fourchan.py b/rss2nntp/fourchan.py new file mode 100644 index 0000000..218faaa --- /dev/null +++ b/rss2nntp/fourchan.py @@ -0,0 +1,85 @@ +import nntplib +import basc_py4chan +from .database import Database + + +op_template = '''From: {sender} +Newsgroups: {group} +Subject: {subject} +Message-ID: {message_id} +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +{body} + +''' + +reply_template = '''From: {sender} +Newsgroups: {group} +Subject: {subject} +Message-ID: {message_id} +References: {op_id} +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +{body} + +''' + +class FourChan: + def __init__(self, group, email, board_name, db, user, pw, server): + self._group = group + self._email = email + self._board_name = board_name + self._db = Database(db) + self._nntp_client = s = nntplib.NNTP_SSL(server, user=user, password=pw) + + def _get_subject(self, post): + if post.subject is not None: + subject = post.subject + elif len(post.text_comment) < 40: + subject = post.text_comment + else: + subject = post.text_comment[0:40] + '...' + + subject = f'No.{post.number}: ' + subject.replace('\n', ' ') + + return subject + + def _get_from(self, post): + return f'{post.name} <{self._email}>' + + def _get_message_id(self, post): + return f'<{post.number}@boards.4chan.org/{self._board_name}/>' + + def _post(self, post, thread_id): + try: + self._nntp_client.post(post.encode('utf-8')) + self._db.insert(thread_id) + except nntplib.NNTPTemporaryError as e: + if e.response.endswith('Duplicate'): + self._db.insert(thread_id) + + def parse(self): + board = basc_py4chan.board(self._board_name, https=True) + for thread_id in board.get_all_thread_ids(): + thread = board.get_thread(thread_id) + + if not self._db.is_read(thread_id): + op = op_template.format(sender=self._get_from(thread.topic), + group=self._group, + subject=self._get_subject(thread.topic), + message_id=self._get_message_id(thread.topic), + body=thread.topic.text_comment) + self._post(op, thread_id) + + for post in thread.all_posts: + if not self._db.is_read(post.number): + subject = self._get_subject(post) + reply = reply_template.format(sender=self._get_from(post), + group=self._group, + subject=self._get_subject(post), + message_id=self._get_message_id(post), + op_id=self._get_message_id(thread.topic), + body=post.text_comment) + self._post(reply, post.number) -- GitLab