diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | README.md | 22 | ||||
-rwxr-xr-x | Silicium/Exceptions.py | 13 | ||||
-rwxr-xr-x | Silicium/Forum.py | 248 | ||||
-rwxr-xr-x | Silicium/Topic.py | 38 | ||||
-rwxr-xr-x | Silicium/User.py | 99 | ||||
-rwxr-xr-x | Silicium/__init__.py | 8 | ||||
-rwxr-xr-x | Silicium/utils.py | 25 | ||||
-rwxr-xr-x | SiliciumCache/__init__.py | 57 | ||||
-rwxr-xr-x | main.py | 23 | ||||
-rw-r--r-- | requirements.txt | 3 | ||||
-rw-r--r-- | scripts/silicium-bot.service | 14 |
12 files changed, 552 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c32f112 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__ +/config.p diff --git a/README.md b/README.md new file mode 100644 index 0000000..2ee96da --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +# Silicium Bot for Mastodon +This is the source code of the Silicium Bot for Mastodon. You can find the +"official" account for this bot at [silicium@oldbytes.space][silly]. + +The objective of this bot is to find new topics on the +[Silicium.org forum][foufouleforum], and make a reference to it on the +Mastodon account (something an RSS reader should be able to do). +It should have a low footprint on the +[Silicium.org forum server][foufouleforum] (usage of a cache, only update +when new things are announced on the category or categories in the chain to +the homepage). + +The [Silicium.org forum][foufouleforum] doesn't have any API for machines, +only humans (using HTML), so this bot scrapes the content. Also, most of +the content is only accessible while connected, so you'll need a +[Silicium.org forum][foufouleforum] account to run this bot. + +This bot is run as a service (not a cron). If you're using **systemd**, +see the `scripts/silicium-bot.service` file for systemd. + +[silly]: https://oldbytes.space/@silicium +[foufouleforum]: http://www.silicium.org/forum/index.php diff --git a/Silicium/Exceptions.py b/Silicium/Exceptions.py new file mode 100755 index 0000000..2864f88 --- /dev/null +++ b/Silicium/Exceptions.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +class FailedAuthError(Exception): + def __init__(self): + super(FailedAuthError, self).__init__("Failed auth.") + +class NotEnoughPermissionsError(Exception): + def __init__(self): + super(NotEnoughPermissionsError, self).__init__(\ + "Not enough permissions!") + +# End of file. diff --git a/Silicium/Forum.py b/Silicium/Forum.py new file mode 100755 index 0000000..c422036 --- /dev/null +++ b/Silicium/Forum.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import requests as _requests +import urllib as _urllib +import datetime as _datetime +import collections as _collections + +from bs4 import BeautifulSoup as _BeautifulSoup +from bs4.element import NavigableString as _NavigableString + +from .User import * +from .Topic import * +from .utils import * + +class Forum: + def __init__(self, forum_id, base = sili_base): + self.id = forum_id + self.__base = base + self.title = None + self.forums = None + self.announcements = None + self.updated = None + self.updater = None + + def load(self, title, updater, updated): + self.title = title + self.updater = updater + self.updated = updated + + def __loadlist(self, element): + """ Load a list of rows. """ + + ul = element.find(True, {'class': ['topiclist topics', + 'topiclist forums']}, recursive=True) + + for li in ul.find_all('li', {'class': 'row'}, recursive=False): + ans = {} + + # Get the title. + title = li.find('a', {'class': ['topictitle', 'forumtitle']}, + recursive=True) + ans['title'] = title.text + + # Load the link. + url = _urllib.parse.urlparse(title['href']) + arg = _urllib.parse.parse_qs(url.query) + if 't' in arg: + ans['topic_id'] = int(arg['t'][0]) + if 'f' in arg: + ans['forum_id'] = int(arg['f'][0]) + + if 'topics' in ul['class']: + # Get the poster ID and date. + tab = list(li.find('div', {'class': 'topic-poster'}, \ + recursive=True).children) + if tab[1].name == 'a': + url = _urllib.parse.urlparse(tab[1]['href']) + arg = _urllib.parse.parse_qs(url.query) + uid = int(arg['u'][0]) + else: + uid = -1 + poster = User(uid, base = self.__base) + poster.name = tab[1].text + + while type(tab[-1]) != _NavigableString \ + or tab[-1].find('»') < 0: + tab = tab[:-1] + p_date = decode_date(tab[-1].split('»')[1]) + ans['poster'] = poster + ans['posted'] = p_date + + # Get the updater ID and date. + tab = list(li.find(True, {'class': 'lastpost'}, \ + recursive=True).find('span').children) + if len(tab) < 6: + ans['updater'] = None + ans['updated'] = None + else: + if tab[-6].name == 'a': + url = _urllib.parse.urlparse(tab[-6]['href']) + arg = _urllib.parse.parse_qs(url.query) + uid = int(arg['u'][0]) + else: + uid = -1 + updater = User(uid, base = self.__base) + updater.name = tab[-6].text + u_date = decode_date(tab[-1].strip()) + ans['updater'] = updater + ans['updated'] = u_date + + yield ans + + def __loadpage(self, start=0, auth = DefaultAuth): + print("[p] Loading entries starting from {}".format(start)) + + url = self.__base + if self.id == 0: + url += '/index.php' + else: + url += '/viewforum.php?f={}&start={}'.format(self.id, start) + + text = _requests.get(url, cookies=auth.cookies()).text + tree = _BeautifulSoup(text, "html5lib") + body = tree.body.find(id='page-body', recursive=True) + + # Check if authentication is required. + if body.find('strong') \ + or body.find('form', {'id': 'login'}, recursive=True): + raise NotEnoughPermissionsError + + # Prepare the answer, find the name. + ans = {'topics': []} + if self.id == 0: + ans['title'] = "My Silicium" + else: + ans['title'] = next(tree.body.find(True, {'class': 'forum-title'}, + recursive=True).children).text + + # Find the forums. + forums = [] + for raw in body.find_all(True, {'class': 'forabg'}): + for el in self.__loadlist(raw): + forum = Forum(el['forum_id'], base = self.__base) + forum.load(el['title'], el['updater'], el['updated']) + forums.append(forum) + ans['forums'] = forums + + # Find the announcements. + announcements = [] + for raw in body.find_all(True, {'class': 'forumbg announcement'}): + for el in self.__loadlist(raw): + topic = Topic(el['topic_id'], base = self.__base) + topic.load(el['title'], el['poster'], el['posted'], + el['updater'], el['updated']) + announcements.append(topic) + break + ans['announcements'] = announcements + + # Find the last page. + buttons = body.find('div', {'class': 'pagination'}, recursive=True) + if buttons and buttons.find('ul'): + buttons = buttons.find('ul').find_all('li') + buttons = _collections.deque(buttons, 2) + if len(buttons) == 1 or 'arrow' in buttons[1]['class']: + button = buttons[0] + else: + button = buttons[1] + if button.find('a'): + button = button.find('a') + url = _urllib.parse.urlparse(button['href']) + arg = _urllib.parse.parse_qs(url.query) + lastpage = int(arg["start"][0]) if "start" in arg else 0 + else: + lastpage = int(button.find('span').text) * 50 - 50 + else: + lastpage = 0 + + # Supplementary checks: + # If `start` is above the maximum possible start value for the + # topic, it will display the last page as if nothing happened. + # So we need to check a little more, as PHPBB3 won't do it + # for us :( + if start >= lastpage + 50: + return ans + if start > lastpage: + sub_ans = self.__loadpage(lastpage, auth) + if start >= lastpage + len(ans): + return ans + ans['topics'] = sub_ans['topics'][start - lastpage:] + return ans + + # Find the topics. + topics = [] + for raw in body.find_all(True, {'class': 'forumbg'}): + if 'announcement' in raw['class']: continue + for el in self.__loadlist(raw): + topic = Topic(el['topic_id'], base = self.__base) + topic.load(el['title'], el['poster'], el['posted'], + el['updater'], el['updated']) + topics.append(topic) + break + ans['topics'] = topics + + return ans + + def __getmain(self, auth = DefaultAuth): + if self.forums != None: + return + + ans = self.__loadpage(auth = auth) + self.title = ans['title'] + self.forums = ans['forums'] + self.announcements = ans['announcements'] + + def get_title(self, auth = DefaultAuth): + self.__getmain(auth) + return self.title + + def get_forums(self, auth = DefaultAuth): + self.__getmain(auth) + return self.forums + + def get_topics(self, start=0, count=50, auth = DefaultAuth): + topics = [] + while count: + ans = self.__loadpage(start, auth) + if not ans['topics']: break + topics += ans['topics'] + start += len(ans['topics']) + count -= len(ans['topics']) + + return topics + + def get_latest_topics(self, since, auth = DefaultAuth): + topics = [] + done = False + start = 0 + while True: + ans = self.__loadpage(start, auth) + if not ans['topics']: break + + for topic in ans['topics']: + if topic.updated <= since: + done = True + break + topics.append(topic) + + if done: + break + start += len(ans['topics']) + + return topics + + def __repr__(self): + rep = '<Silicium Forum {}'.format(self.id) + if self.updated: + rep += ' updated on {}'.format(self.updated.isoformat()) + if self.updater: + rep += ' by {}'.format(self.updater.name) + else: + rep += ' never updated' + if self.title: + rep += ' entitled "{}"'.format(self.title) + rep += '>' + return rep + +# End of file. diff --git a/Silicium/Topic.py b/Silicium/Topic.py new file mode 100755 index 0000000..3c67c1e --- /dev/null +++ b/Silicium/Topic.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from .utils import * + +class Topic: + def __init__(self, topic_id, base = sili_base): + self.id = topic_id + self.__base = base + self.title = None + self.poster = None + self.posted = None + self.updater = None + self.updated = None + + def load(self, title, poster, posted, updater, updated): + self.title = title + self.poster = poster + self.posted = posted + self.updater = updater + self.updated = updated + + def __repr__(self): + rep = '<Silicium Topic {}'.format(self.id) + if self.posted: + rep += ' posted on {}'.format(self.posted.isoformat()) + if self.poster: + rep += ' by {}'.format(self.poster.name) + if self.updated: + rep += ' updated on {}'.format(self.updated.isoformat()) + if self.updater: + rep += ' by {}'.format(self.updater.name) + if self.title: + rep += ' entitled "{}"'.format(self.title) + rep += '>' + return rep + +# End of file. diff --git a/Silicium/User.py b/Silicium/User.py new file mode 100755 index 0000000..89f290c --- /dev/null +++ b/Silicium/User.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import requests as _requests + +from .utils import * + +class Auth: + """ Authentication object. """ + + def __init__(self, user, base = sili_base): + self.user = user + self.__base = base + + def use(self, pref, u, k, sid): + self.__pref = pref + self.__u = u + self.__k = k + self.__sid = sid + + def save(self): + return (self.__pref, self.__u, self.__k, self.__sid) + + def get(self, password): + # Get the CSRF token. + r = _requests.get(self.__base + '/ucp.php?mode=login') + + # Get the prefix, `phpbb3_<random string>_u/k/sid`. + pref = None + for c in r.cookies.keys(): + if c.startswith("phpbb3_") and c.endswith("_u"): + pref = c[:-1] + break + if not pref or not pref + "u" in r.cookies: + raise FailedAuthError + + # Save the visitor things. + self.__pref = pref + self.__u = r.cookies[pref + 'u'] + self.__k = '' + self.__sid = r.cookies[pref + 'sid'] + if self.user.id == 1: + return + + # Also sets `sid`. CSRF token? + params = {'username': self.user.name, 'password': password, + 'autologin': 'on', 'redirect': 'index.php', + pref + 'sid': r.cookies[pref + 'sid'], 'login': 'Connexion'} + cookies = {pref + 'u': u, pref + 'k': '', pref + 'sid': sid} + + r = _requests.post(self.__base + '/ucp.php?mode=login', + params, cookies=cookies, allow_redirects=False) + + # Get the prefix, `phpbb3_<random string>_u/k/sid`. + pref = None + for c in r.cookies.keys(): + if c.startswith("phpbb3_") and c.endswith("_u"): + pref = c[:-1] + break + if not pref or not pref + "u" in r.cookies: + raise FailedAuthError + + # Get the things. + self.__pref = pref + self.__u = r.cookies[pref + "u"] + self.__k = r.cookies[pref + "k"] + self.__sid = r.cookies[pref + "sid"] + + def cookies(self): + return { + self.__pref + 'u': self.__u, + self.__pref + 'k': self.__k, + self.__pref + 'sid': self.__sid} + +class User: + """ Account object. Manages an account. """ + + def __init__(self, user_id = 1, base = sili_base): + self.__base = base + self.id = user_id + self.name = None + + def login(self, password = '', name = None): + if name != None: + self.name = name + auth = Auth(self, base = self.__base) + auth.get(password) + return auth + + def __repr__(self): + rep = '<Silicium User {}'.format(self.id) + if self.name: rep += ': "{}"'.format(self.name) + rep += '>' + return rep + +DefaultUser = User() +DefaultAuth = DefaultUser.login() + +# End of file. diff --git a/Silicium/__init__.py b/Silicium/__init__.py new file mode 100755 index 0000000..5d6b86f --- /dev/null +++ b/Silicium/__init__.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from .User import * +from .Forum import * +from .Topic import * + +# End of file. diff --git a/Silicium/utils.py b/Silicium/utils.py new file mode 100755 index 0000000..10cbbdd --- /dev/null +++ b/Silicium/utils.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import datetime as _datetime + +from .Exceptions import * + +sili_base = 'http://silicium.org/forum' + +def decode_date(s): + """ Décodage de la date. """ + + # Get the tab. + tab = s.split() + + # Get the data. + day = int(tab[0]) + month = ['janv.', 'févr.', 'mars', 'avr.', 'mai', 'juin', + 'juil.', 'août', 'sept.', 'oct.', 'nov.', 'déc.'].index(tab[1]) + 1 + year = int(tab[2]) + hour, minute = map(int, tab[3].split(':')) + + return _datetime.datetime(year, month, day, hour, minute, 0) + +# End of file. diff --git a/SiliciumCache/__init__.py b/SiliciumCache/__init__.py new file mode 100755 index 0000000..33bd90a --- /dev/null +++ b/SiliciumCache/__init__.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +""" Cache management for the Silicium Bot. + This is the main object when interacting with the site's data. +""" + +import pickle, datetime +import Silicium + +class CacheManager: + def __init__(self, path): + self.__path = path + try: + self.forums = pickle.load(open(self.__path, 'rb')) + except: self.forums = {} + + def __refresh_forum(self, forum): + topics = [] + + try: title = forum.get_title() + except NotEnoughPermissionsError: return [] + + print("[f] Gathering from forum {}: '{}'".format(forum.id, title)) + + # Refresh subforums. + for subforum in forum.get_forums(): + if subforum.id in self.forums: + u0 = self.forums[subforum.id]['updated'] + u1 = subforum.updated + if not u1 or (u0 and u0 >= u1): + continue + topics.extend(self.__refresh_forum(subforum)) + + # Check if the entry exists, create it otherwise. + if not forum.id in self.forums: + self.forums[forum.id] = { + 'updated': None, + 'topics': [] + } + + # Check all of the topics. + since = self.forums[forum.id]['updated'] + if not since: since = datetime.datetime(1970, 1, 1, 0, 0) + for topic in forum.get_latest_topics(since): + if not topic.id in self.forums[forum.id]['topics']: + topics.append(topic) + self.forums[forum.id]['topics'].append(topic.id) + + # Update. + self.forums[forum.id]['updated'] = forum.updated + return topics + + def refresh(self): + topics = self.__refresh_forum(Silicium.Forum(0)) + pickle.dump(self.forums, open(self.__path, 'wb')) + return topics + +# End of file. @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +""" The main file for the Silicium Bot. + Refer to `README.md` if you have any questions on the project. +""" + +import os, argparse +import SiliciumCache + +if __name__ == "__main__": + cache_path = os.path.normpath(os.path.join(os.path.dirname(__file__), + 'cache.p')) + + # Parse the arguments. + argparser = argparse.ArgumentParser() + argparser.add_argument('--cache', dest='cache', + help='Cache path.', default=cache_path) + args = argparser.parse_args() + + # Make the cache manager, refresh. + cache = SiliciumCache.CacheManager(args.cache) + print(cache.refresh()) + +# End of file. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a9e5e1a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +requests +bs4 +mastodon.py diff --git a/scripts/silicium-bot.service b/scripts/silicium-bot.service new file mode 100644 index 0000000..5501292 --- /dev/null +++ b/scripts/silicium-bot.service @@ -0,0 +1,14 @@ +[Unit] +Description=silicium-bot +After=network.target + +[Service] +Type=simple +User=sili +WorkingDirectory=/home/sili/bot +ExecStart=/home/sili/bot/main.py +TimeoutSec=15 +Restart=always + +[Install] +WantedBy=multi-user.target |