aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--README.md22
-rwxr-xr-xSilicium/Exceptions.py13
-rwxr-xr-xSilicium/Forum.py248
-rwxr-xr-xSilicium/Topic.py38
-rwxr-xr-xSilicium/User.py99
-rwxr-xr-xSilicium/__init__.py8
-rwxr-xr-xSilicium/utils.py25
-rwxr-xr-xSiliciumCache/__init__.py57
-rwxr-xr-xmain.py23
-rw-r--r--requirements.txt3
-rw-r--r--scripts/silicium-bot.service14
12 files changed, 552 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c32f112
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+__pycache__
+/config.p
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..2ee96da
--- /dev/null
+++ b/README.md
@@ -0,0 +1,22 @@
+# Silicium Bot for Mastodon
+This is the source code of the Silicium Bot for Mastodon. You can find the
+"official" account for this bot at [silicium@oldbytes.space][silly].
+
+The objective of this bot is to find new topics on the
+[Silicium.org forum][foufouleforum], and make a reference to it on the
+Mastodon account (something an RSS reader should be able to do).
+It should have a low footprint on the
+[Silicium.org forum server][foufouleforum] (usage of a cache, only update
+when new things are announced on the category or categories in the chain to
+the homepage).
+
+The [Silicium.org forum][foufouleforum] doesn't have any API for machines,
+only humans (using HTML), so this bot scrapes the content. Also, most of
+the content is only accessible while connected, so you'll need a
+[Silicium.org forum][foufouleforum] account to run this bot.
+
+This bot is run as a service (not a cron). If you're using **systemd**,
+see the `scripts/silicium-bot.service` file for systemd.
+
+[silly]: https://oldbytes.space/@silicium
+[foufouleforum]: http://www.silicium.org/forum/index.php
diff --git a/Silicium/Exceptions.py b/Silicium/Exceptions.py
new file mode 100755
index 0000000..2864f88
--- /dev/null
+++ b/Silicium/Exceptions.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+class FailedAuthError(Exception):
+ def __init__(self):
+ super(FailedAuthError, self).__init__("Failed auth.")
+
+class NotEnoughPermissionsError(Exception):
+ def __init__(self):
+ super(NotEnoughPermissionsError, self).__init__(\
+ "Not enough permissions!")
+
+# End of file.
diff --git a/Silicium/Forum.py b/Silicium/Forum.py
new file mode 100755
index 0000000..c422036
--- /dev/null
+++ b/Silicium/Forum.py
@@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import requests as _requests
+import urllib as _urllib
+import datetime as _datetime
+import collections as _collections
+
+from bs4 import BeautifulSoup as _BeautifulSoup
+from bs4.element import NavigableString as _NavigableString
+
+from .User import *
+from .Topic import *
+from .utils import *
+
+class Forum:
+ def __init__(self, forum_id, base = sili_base):
+ self.id = forum_id
+ self.__base = base
+ self.title = None
+ self.forums = None
+ self.announcements = None
+ self.updated = None
+ self.updater = None
+
+ def load(self, title, updater, updated):
+ self.title = title
+ self.updater = updater
+ self.updated = updated
+
+ def __loadlist(self, element):
+ """ Load a list of rows. """
+
+ ul = element.find(True, {'class': ['topiclist topics',
+ 'topiclist forums']}, recursive=True)
+
+ for li in ul.find_all('li', {'class': 'row'}, recursive=False):
+ ans = {}
+
+ # Get the title.
+ title = li.find('a', {'class': ['topictitle', 'forumtitle']},
+ recursive=True)
+ ans['title'] = title.text
+
+ # Load the link.
+ url = _urllib.parse.urlparse(title['href'])
+ arg = _urllib.parse.parse_qs(url.query)
+ if 't' in arg:
+ ans['topic_id'] = int(arg['t'][0])
+ if 'f' in arg:
+ ans['forum_id'] = int(arg['f'][0])
+
+ if 'topics' in ul['class']:
+ # Get the poster ID and date.
+ tab = list(li.find('div', {'class': 'topic-poster'}, \
+ recursive=True).children)
+ if tab[1].name == 'a':
+ url = _urllib.parse.urlparse(tab[1]['href'])
+ arg = _urllib.parse.parse_qs(url.query)
+ uid = int(arg['u'][0])
+ else:
+ uid = -1
+ poster = User(uid, base = self.__base)
+ poster.name = tab[1].text
+
+ while type(tab[-1]) != _NavigableString \
+ or tab[-1].find('»') < 0:
+ tab = tab[:-1]
+ p_date = decode_date(tab[-1].split('»')[1])
+ ans['poster'] = poster
+ ans['posted'] = p_date
+
+ # Get the updater ID and date.
+ tab = list(li.find(True, {'class': 'lastpost'}, \
+ recursive=True).find('span').children)
+ if len(tab) < 6:
+ ans['updater'] = None
+ ans['updated'] = None
+ else:
+ if tab[-6].name == 'a':
+ url = _urllib.parse.urlparse(tab[-6]['href'])
+ arg = _urllib.parse.parse_qs(url.query)
+ uid = int(arg['u'][0])
+ else:
+ uid = -1
+ updater = User(uid, base = self.__base)
+ updater.name = tab[-6].text
+ u_date = decode_date(tab[-1].strip())
+ ans['updater'] = updater
+ ans['updated'] = u_date
+
+ yield ans
+
+ def __loadpage(self, start=0, auth = DefaultAuth):
+ print("[p] Loading entries starting from {}".format(start))
+
+ url = self.__base
+ if self.id == 0:
+ url += '/index.php'
+ else:
+ url += '/viewforum.php?f={}&start={}'.format(self.id, start)
+
+ text = _requests.get(url, cookies=auth.cookies()).text
+ tree = _BeautifulSoup(text, "html5lib")
+ body = tree.body.find(id='page-body', recursive=True)
+
+ # Check if authentication is required.
+ if body.find('strong') \
+ or body.find('form', {'id': 'login'}, recursive=True):
+ raise NotEnoughPermissionsError
+
+ # Prepare the answer, find the name.
+ ans = {'topics': []}
+ if self.id == 0:
+ ans['title'] = "My Silicium"
+ else:
+ ans['title'] = next(tree.body.find(True, {'class': 'forum-title'},
+ recursive=True).children).text
+
+ # Find the forums.
+ forums = []
+ for raw in body.find_all(True, {'class': 'forabg'}):
+ for el in self.__loadlist(raw):
+ forum = Forum(el['forum_id'], base = self.__base)
+ forum.load(el['title'], el['updater'], el['updated'])
+ forums.append(forum)
+ ans['forums'] = forums
+
+ # Find the announcements.
+ announcements = []
+ for raw in body.find_all(True, {'class': 'forumbg announcement'}):
+ for el in self.__loadlist(raw):
+ topic = Topic(el['topic_id'], base = self.__base)
+ topic.load(el['title'], el['poster'], el['posted'],
+ el['updater'], el['updated'])
+ announcements.append(topic)
+ break
+ ans['announcements'] = announcements
+
+ # Find the last page.
+ buttons = body.find('div', {'class': 'pagination'}, recursive=True)
+ if buttons and buttons.find('ul'):
+ buttons = buttons.find('ul').find_all('li')
+ buttons = _collections.deque(buttons, 2)
+ if len(buttons) == 1 or 'arrow' in buttons[1]['class']:
+ button = buttons[0]
+ else:
+ button = buttons[1]
+ if button.find('a'):
+ button = button.find('a')
+ url = _urllib.parse.urlparse(button['href'])
+ arg = _urllib.parse.parse_qs(url.query)
+ lastpage = int(arg["start"][0]) if "start" in arg else 0
+ else:
+ lastpage = int(button.find('span').text) * 50 - 50
+ else:
+ lastpage = 0
+
+ # Supplementary checks:
+ # If `start` is above the maximum possible start value for the
+ # topic, it will display the last page as if nothing happened.
+ # So we need to check a little more, as PHPBB3 won't do it
+ # for us :(
+ if start >= lastpage + 50:
+ return ans
+ if start > lastpage:
+ sub_ans = self.__loadpage(lastpage, auth)
+ if start >= lastpage + len(ans):
+ return ans
+ ans['topics'] = sub_ans['topics'][start - lastpage:]
+ return ans
+
+ # Find the topics.
+ topics = []
+ for raw in body.find_all(True, {'class': 'forumbg'}):
+ if 'announcement' in raw['class']: continue
+ for el in self.__loadlist(raw):
+ topic = Topic(el['topic_id'], base = self.__base)
+ topic.load(el['title'], el['poster'], el['posted'],
+ el['updater'], el['updated'])
+ topics.append(topic)
+ break
+ ans['topics'] = topics
+
+ return ans
+
+ def __getmain(self, auth = DefaultAuth):
+ if self.forums != None:
+ return
+
+ ans = self.__loadpage(auth = auth)
+ self.title = ans['title']
+ self.forums = ans['forums']
+ self.announcements = ans['announcements']
+
+ def get_title(self, auth = DefaultAuth):
+ self.__getmain(auth)
+ return self.title
+
+ def get_forums(self, auth = DefaultAuth):
+ self.__getmain(auth)
+ return self.forums
+
+ def get_topics(self, start=0, count=50, auth = DefaultAuth):
+ topics = []
+ while count:
+ ans = self.__loadpage(start, auth)
+ if not ans['topics']: break
+ topics += ans['topics']
+ start += len(ans['topics'])
+ count -= len(ans['topics'])
+
+ return topics
+
+ def get_latest_topics(self, since, auth = DefaultAuth):
+ topics = []
+ done = False
+ start = 0
+ while True:
+ ans = self.__loadpage(start, auth)
+ if not ans['topics']: break
+
+ for topic in ans['topics']:
+ if topic.updated <= since:
+ done = True
+ break
+ topics.append(topic)
+
+ if done:
+ break
+ start += len(ans['topics'])
+
+ return topics
+
+ def __repr__(self):
+ rep = '<Silicium Forum {}'.format(self.id)
+ if self.updated:
+ rep += ' updated on {}'.format(self.updated.isoformat())
+ if self.updater:
+ rep += ' by {}'.format(self.updater.name)
+ else:
+ rep += ' never updated'
+ if self.title:
+ rep += ' entitled "{}"'.format(self.title)
+ rep += '>'
+ return rep
+
+# End of file.
diff --git a/Silicium/Topic.py b/Silicium/Topic.py
new file mode 100755
index 0000000..3c67c1e
--- /dev/null
+++ b/Silicium/Topic.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from .utils import *
+
+class Topic:
+ def __init__(self, topic_id, base = sili_base):
+ self.id = topic_id
+ self.__base = base
+ self.title = None
+ self.poster = None
+ self.posted = None
+ self.updater = None
+ self.updated = None
+
+ def load(self, title, poster, posted, updater, updated):
+ self.title = title
+ self.poster = poster
+ self.posted = posted
+ self.updater = updater
+ self.updated = updated
+
+ def __repr__(self):
+ rep = '<Silicium Topic {}'.format(self.id)
+ if self.posted:
+ rep += ' posted on {}'.format(self.posted.isoformat())
+ if self.poster:
+ rep += ' by {}'.format(self.poster.name)
+ if self.updated:
+ rep += ' updated on {}'.format(self.updated.isoformat())
+ if self.updater:
+ rep += ' by {}'.format(self.updater.name)
+ if self.title:
+ rep += ' entitled "{}"'.format(self.title)
+ rep += '>'
+ return rep
+
+# End of file.
diff --git a/Silicium/User.py b/Silicium/User.py
new file mode 100755
index 0000000..89f290c
--- /dev/null
+++ b/Silicium/User.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import requests as _requests
+
+from .utils import *
+
+class Auth:
+ """ Authentication object. """
+
+ def __init__(self, user, base = sili_base):
+ self.user = user
+ self.__base = base
+
+ def use(self, pref, u, k, sid):
+ self.__pref = pref
+ self.__u = u
+ self.__k = k
+ self.__sid = sid
+
+ def save(self):
+ return (self.__pref, self.__u, self.__k, self.__sid)
+
+ def get(self, password):
+ # Get the CSRF token.
+ r = _requests.get(self.__base + '/ucp.php?mode=login')
+
+ # Get the prefix, `phpbb3_<random string>_u/k/sid`.
+ pref = None
+ for c in r.cookies.keys():
+ if c.startswith("phpbb3_") and c.endswith("_u"):
+ pref = c[:-1]
+ break
+ if not pref or not pref + "u" in r.cookies:
+ raise FailedAuthError
+
+ # Save the visitor things.
+ self.__pref = pref
+ self.__u = r.cookies[pref + 'u']
+ self.__k = ''
+ self.__sid = r.cookies[pref + 'sid']
+ if self.user.id == 1:
+ return
+
+ # Also sets `sid`. CSRF token?
+ params = {'username': self.user.name, 'password': password,
+ 'autologin': 'on', 'redirect': 'index.php',
+ pref + 'sid': r.cookies[pref + 'sid'], 'login': 'Connexion'}
+ cookies = {pref + 'u': u, pref + 'k': '', pref + 'sid': sid}
+
+ r = _requests.post(self.__base + '/ucp.php?mode=login',
+ params, cookies=cookies, allow_redirects=False)
+
+ # Get the prefix, `phpbb3_<random string>_u/k/sid`.
+ pref = None
+ for c in r.cookies.keys():
+ if c.startswith("phpbb3_") and c.endswith("_u"):
+ pref = c[:-1]
+ break
+ if not pref or not pref + "u" in r.cookies:
+ raise FailedAuthError
+
+ # Get the things.
+ self.__pref = pref
+ self.__u = r.cookies[pref + "u"]
+ self.__k = r.cookies[pref + "k"]
+ self.__sid = r.cookies[pref + "sid"]
+
+ def cookies(self):
+ return {
+ self.__pref + 'u': self.__u,
+ self.__pref + 'k': self.__k,
+ self.__pref + 'sid': self.__sid}
+
+class User:
+ """ Account object. Manages an account. """
+
+ def __init__(self, user_id = 1, base = sili_base):
+ self.__base = base
+ self.id = user_id
+ self.name = None
+
+ def login(self, password = '', name = None):
+ if name != None:
+ self.name = name
+ auth = Auth(self, base = self.__base)
+ auth.get(password)
+ return auth
+
+ def __repr__(self):
+ rep = '<Silicium User {}'.format(self.id)
+ if self.name: rep += ': "{}"'.format(self.name)
+ rep += '>'
+ return rep
+
+DefaultUser = User()
+DefaultAuth = DefaultUser.login()
+
+# End of file.
diff --git a/Silicium/__init__.py b/Silicium/__init__.py
new file mode 100755
index 0000000..5d6b86f
--- /dev/null
+++ b/Silicium/__init__.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from .User import *
+from .Forum import *
+from .Topic import *
+
+# End of file.
diff --git a/Silicium/utils.py b/Silicium/utils.py
new file mode 100755
index 0000000..10cbbdd
--- /dev/null
+++ b/Silicium/utils.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import datetime as _datetime
+
+from .Exceptions import *
+
+sili_base = 'http://silicium.org/forum'
+
+def decode_date(s):
+ """ Décodage de la date. """
+
+ # Get the tab.
+ tab = s.split()
+
+ # Get the data.
+ day = int(tab[0])
+ month = ['janv.', 'févr.', 'mars', 'avr.', 'mai', 'juin',
+ 'juil.', 'août', 'sept.', 'oct.', 'nov.', 'déc.'].index(tab[1]) + 1
+ year = int(tab[2])
+ hour, minute = map(int, tab[3].split(':'))
+
+ return _datetime.datetime(year, month, day, hour, minute, 0)
+
+# End of file.
diff --git a/SiliciumCache/__init__.py b/SiliciumCache/__init__.py
new file mode 100755
index 0000000..33bd90a
--- /dev/null
+++ b/SiliciumCache/__init__.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+""" Cache management for the Silicium Bot.
+ This is the main object when interacting with the site's data.
+"""
+
+import pickle, datetime
+import Silicium
+
+class CacheManager:
+ def __init__(self, path):
+ self.__path = path
+ try:
+ self.forums = pickle.load(open(self.__path, 'rb'))
+ except: self.forums = {}
+
+ def __refresh_forum(self, forum):
+ topics = []
+
+ try: title = forum.get_title()
+ except NotEnoughPermissionsError: return []
+
+ print("[f] Gathering from forum {}: '{}'".format(forum.id, title))
+
+ # Refresh subforums.
+ for subforum in forum.get_forums():
+ if subforum.id in self.forums:
+ u0 = self.forums[subforum.id]['updated']
+ u1 = subforum.updated
+ if not u1 or (u0 and u0 >= u1):
+ continue
+ topics.extend(self.__refresh_forum(subforum))
+
+ # Check if the entry exists, create it otherwise.
+ if not forum.id in self.forums:
+ self.forums[forum.id] = {
+ 'updated': None,
+ 'topics': []
+ }
+
+ # Check all of the topics.
+ since = self.forums[forum.id]['updated']
+ if not since: since = datetime.datetime(1970, 1, 1, 0, 0)
+ for topic in forum.get_latest_topics(since):
+ if not topic.id in self.forums[forum.id]['topics']:
+ topics.append(topic)
+ self.forums[forum.id]['topics'].append(topic.id)
+
+ # Update.
+ self.forums[forum.id]['updated'] = forum.updated
+ return topics
+
+ def refresh(self):
+ topics = self.__refresh_forum(Silicium.Forum(0))
+ pickle.dump(self.forums, open(self.__path, 'wb'))
+ return topics
+
+# End of file.
diff --git a/main.py b/main.py
new file mode 100755
index 0000000..6f15af7
--- /dev/null
+++ b/main.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+""" The main file for the Silicium Bot.
+ Refer to `README.md` if you have any questions on the project.
+"""
+
+import os, argparse
+import SiliciumCache
+
+if __name__ == "__main__":
+ cache_path = os.path.normpath(os.path.join(os.path.dirname(__file__),
+ 'cache.p'))
+
+ # Parse the arguments.
+ argparser = argparse.ArgumentParser()
+ argparser.add_argument('--cache', dest='cache',
+ help='Cache path.', default=cache_path)
+ args = argparser.parse_args()
+
+ # Make the cache manager, refresh.
+ cache = SiliciumCache.CacheManager(args.cache)
+ print(cache.refresh())
+
+# End of file.
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..a9e5e1a
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+requests
+bs4
+mastodon.py
diff --git a/scripts/silicium-bot.service b/scripts/silicium-bot.service
new file mode 100644
index 0000000..5501292
--- /dev/null
+++ b/scripts/silicium-bot.service
@@ -0,0 +1,14 @@
+[Unit]
+Description=silicium-bot
+After=network.target
+
+[Service]
+Type=simple
+User=sili
+WorkingDirectory=/home/sili/bot
+ExecStart=/home/sili/bot/main.py
+TimeoutSec=15
+Restart=always
+
+[Install]
+WantedBy=multi-user.target