aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas "Cakeisalie5" Touhey <thomas@touhey.fr>2018-10-07 01:03:57 +0200
committerThomas "Cakeisalie5" Touhey <thomas@touhey.fr>2018-10-07 01:03:57 +0200
commit45365f0d5eeccdeb4e1b15d38149a5f75ad655c9 (patch)
tree4e5d08455b8d84a136b6f159454da9b4615e70a0
parentaacdcf1e860c9afc687b868dc7417307e75efc8a (diff)
Wrote the README, working on the interface.
-rw-r--r--README.md1
-rw-r--r--README.rst85
-rwxr-xr-xsgdfi/__main__.py10
-rw-r--r--sgdfi/_dbs/_event.py75
-rwxr-xr-xsgdfi/_manager.py143
-rwxr-xr-xsgdfi/_repr.py20
-rwxr-xr-xsgdfi/_session.py41
7 files changed, 368 insertions, 7 deletions
diff --git a/README.md b/README.md
deleted file mode 100644
index 98ff6d5..0000000
--- a/README.md
+++ /dev/null
@@ -1 +0,0 @@
-# SGDFi : interagir avec l'intranet SGDF
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..8a12687
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,85 @@
+SGDFi : interact with SGDF's (Scouts et Guides de France) intranet
+==================================================================
+
+`Scouts et Guides de France <SGDF_>`_ is a Scouting and Guiding organization
+based in France, member of the `Scoutisme Français`_. For managing its
+assets and operations, it uses an intranet based on `Intr@ssoc`_ and
+managed by `Supralog`_, which is a web application with no easy
+machine access.
+
+SGDFi is a Python 3.x which aims at bringing a machine access to this web
+application, allowing read and write access to what is managed by the intranet
+through a given adherent's account by imitating a web browser (web scraping,
+form completing, ajax simulating, and so on).
+
+Using SGDFi
+-----------
+
+SGDFi separate four aspects for interacting with the intranet, every one
+depending on the last ones:
+
+- **representation.** SGDFi provides objects and enumerations to represent
+ intranet objects, with value validation.
+- **storing.** SGDFi provides a database which is able to manage the knowledge
+ we have of the intranet by adding, completing and searching through
+ objects.
+- **decoding.** SGDFi provides a manager which decodes various files (HTML
+ pages and fragments, AJAX responses, XLS, XML and JSON documents, …),
+ extracts valuable information from them and feeds a database to link
+ the just extracted information with previously extracted information (e.g.
+ structure hierarchy and structure information).
+- **interacting.** SGDFi provides a session which send the commands, and
+ gather the raw documents given to a manager.
+
+The interface itself is still under heavy development.
+
+More on the intranet
+--------------------
+
+The `Intr@ssoc`_ base is owned and managed by `Supralog`_, and adapted for
+each one of their clients. SGDF is not the only french scouting organization
+to use it, the `Éclaireuses et Éclaireurs de France <EEDF_>`_ use it as well.
+
+`SGDF`_'s intranet is available at `intranet.sgdf.fr`_, although a test one
+is also available at `intranet-qualification.sgdf.fr`_. It is programmed in
+``ASP.NET``, and runs on Microsoft IIS (which is tipped off by the ``Server``
+and ``X-Powered-By`` HTTP headers, and the ``.aspx`` extensions visible in
+the URLs).
+
+Information is gathered several ways:
+
+- through ``GET``ting the page.
+- through ``POST``ing the page while gathering fields on the ``GET`` version
+ (including a CSRF token).
+- through gathering a `Microsoft Ajax`_ page fragment.
+
+For more information, I'm writing a wiki besides which might be available
+some time.
+
+What is left to do
+------------------
+
+There are plenty of things to do:
+
+- use MIME types instead of random types for ``Manager.feed()``, and check
+ what we can send to ``Manager.feed()`` instead of text to also manage
+ binary file formats such as XLS.
+- translate everything from the SDY project.
+- isolate the database?
+- manage tips on adherents through operations, e.g. only some purple shirts
+ can edit an adherent, or only the treasurer can declare that the adherent
+ has payed their contribution.
+- make manager and session independant so that managers can manage more than
+ one session at a time?
+- make manager public? database? representations?
+- document the interface when stable?
+
+.. _SGDF: https://www.sgdf.fr/
+.. _EEDF: https://www.eedf.fr/
+.. _Scoutisme Français: https://www.scoutisme-francais.fr/
+.. _Intr@ssoc: http://www.intrassoc.com/
+.. _Supralog: http://www.supralog.com
+.. _intranet.sgdf.fr: https://intranet.sgdf.fr/
+.. _intranet-qualification.sgdf.fr: http://intranet-qualification.sgdf.fr/
+
+.. _Microsoft Ajax: https://msdn.microsoft.com/fr-fr/library/ee341002(v=vs.100).aspx
diff --git a/sgdfi/__main__.py b/sgdfi/__main__.py
index d729e77..a7cfe80 100755
--- a/sgdfi/__main__.py
+++ b/sgdfi/__main__.py
@@ -22,9 +22,10 @@ def test_session():
s = get_session()
- ret = s.get_ops('4', "yCbyTmNDHpp8CotDhWoEkQ==")
+ #ret = s.get_ops('4', "yCbyTmNDHpp8CotDhWoEkQ==")
#ret = s.get_place('ONCt5ZDoIUZIXlYv9QyGuw==')
#ret = s.get_structure('oA0pOxhIxVX5eh6fTNvgzg==')
+ #ret = s.get_events(2018, 10)
if type(ret) == list:
for e in ret:
@@ -71,7 +72,8 @@ def test_load():
from ._manager import Manager
man = Manager()
- ret = man.load_dump("2018100513245400-operations.html")
+ ret = man.load_dump("2018100620105800-operations.xls")
+ #ret = man.load_dump("2018100516460200-calendar_month.txt")
if type(ret) == list:
for e in ret:
@@ -90,9 +92,9 @@ def test_funcs():
if __name__ == '__main__':
#test_repr()
- test_session()
+ #test_session()
#test_save()
- #test_load()
+ test_load()
#test_funcs()
# End of file.
diff --git a/sgdfi/_dbs/_event.py b/sgdfi/_dbs/_event.py
new file mode 100644
index 0000000..74ae749
--- /dev/null
+++ b/sgdfi/_dbs/_event.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+#******************************************************************************
+# Copyright (C) 2018 Thomas "Cakeisalie5" Touhey <thomas@touhey.fr>
+# This file is part of the sgdfi project, which is MIT-licensed.
+#******************************************************************************
+""" Event type reference for SGDFi. """
+
+from enum import Enum as _Enum, unique as _unique
+
+__all__ = ["EventType", "EventTypeData"]
+
+@_unique
+class EventType(_Enum):
+ """ The default type (unknown). """
+ UNKNOWN = 0
+
+ """ Sortie journée. """
+ DAY = 1
+
+ """ Réunion demi-journée. """
+ HALF = 2
+
+ """ Week-End. """
+ WEEKEND = 3
+
+ """ Réunion de travail. """
+ WORK_MEETING = 4
+
+ """ Camp Année. """
+ YEAR_CAMP = 5
+
+ """ Camp Été. """
+ CAMP = 6
+
+ """ Extrajob. """
+ EXTRAJOB = 7
+
+# Each entry in `_EventTypeData` has the following fields:
+# - Full type name.
+# - Abbreviated type name for XLS exports.
+
+_EventTypeData = {
+ EventType.DAY = ("Sortie Journée", ""),
+ EventType.HALF = ("Réunion Demi-Journée", "DemJour"),
+ EventType.WEEKEND = ("Week-End", "WE"),
+ EventType.WORK_MEETING = ("Réunion de travail", "RéuTrav"),
+ EventType.YEAR_CAMP = ("Camp Année", ""),
+ EventType.CAMP = ("Camp Eté", ""),
+ EventType.EXTRAJOB = ("Extrajob", ""),
+}
+
+# Make the leads.
+
+_EventTypeLeads = {}
+_EventTypeLeads.update({full.strip().casefold(): i \
+ for i, (full, abbr) in _EventTypeData.items()})
+_EventTypeLeads.update({abbr.strip().casefold(): i \
+ for i, (full, abbr) in _EventTypeData.items()})
+
+# Define the data class.
+
+class EventTypeData:
+ """ The event type data. """
+
+ def __init__(self, value):
+ def isid(value):
+ try:
+ EventType(value)
+ except ValueError:
+ return False
+ return True
+
+ # TODO
+
+# End of file.
diff --git a/sgdfi/_manager.py b/sgdfi/_manager.py
index 8630030..1d23c5c 100755
--- a/sgdfi/_manager.py
+++ b/sgdfi/_manager.py
@@ -349,7 +349,7 @@ class Manager:
# décrits par le document `intranet.rst`.
resp = []
- raw = iter(text.split('|')[:-1])
+ raw = iter(content.split('|')[:-1])
for code in raw:
code = int(code)
name = next(raw)
@@ -402,6 +402,8 @@ class Manager:
elif type == 'xls':
if hint == 'people':
func = self.__feed_xls_people
+ elif hint == 'operations':
+ func = self.__feed_xls_operations
else:
raise ValueError(f"unknown xls hint: {repr(hint)}")
@@ -960,6 +962,145 @@ class Manager:
# TODO: feed.
return people
+ def __feed_html_calendar_month_fragment(self, content):
+ """ Decode the HTML page for a calendar month from a
+ BeautifulSoup decoded content and feed it into the
+ manager's structures. """
+
+ parent = content.find(id = 'ctl00_MainContent__calendrier')
+ events = []
+
+ for jour in parent.find_all(attrs = {'class': ['jourDuMois']}):
+ for div in jour.find_all('div'):
+ try:
+ e_id = div.find_all('a', recursive = False)[1]
+ except:
+ continue
+ e_id = _parse_qs(_urlparse(e_id['href']).query)['id'][0]
+
+ e = Event()
+
+ span = div.find('span')['onmouseover']
+ span = span[span.find("'") + 1:]
+ spanidx = next(_rfinditer(r"($|[^\\])(\\{2})*\'", span)).end()
+ span = span[:spanidx - 1]
+ span = _rreplace(r"($|[^\\])\\*'", r"\1'", span)
+ span = _htmlunescape(span)
+
+ span = span.replace('<br />', '\n')
+ span = span.replace('<hr />', '')
+ span = span.replace('<u>', '')
+ span = span.replace('</u>', '')
+ span = _rsplit(r'</?b>', span)
+
+ # Identifiant de l'intranet.
+
+ e.iid = e_id
+
+ # Libellé de l'évènement.
+
+ e.name = span[1].strip()
+
+ # Description de l'évènement.
+ # Au format multi-lignes.
+
+ desc = span[14].strip()
+ if desc != "Indéterminée":
+ e.desc = desc
+
+ # Type de l'évènement. TODO
+ # Parmi :
+ # - "Sortie Journée"
+ # - "Réunion Demi-Journée"
+ # - "Week-End"
+ # - "Réunion de travail"
+ # - "Camp Année"
+ # - "Camp Eté"
+ # - "Extrajob"
+
+ typ = span[4].strip()
+
+ # Date de début de l'évènement.
+ # Sous le format "JJ/MM/AAAA HH:MM".
+
+ def dateheure(x):
+ jma, hm = x.split()
+ j, M, a = map(int, jma.split('/'))
+ h, m = map(int, hm.split(':'))
+
+ return DateHeure(a, M, j, h, m)
+
+ du = span[6].strip()
+ e.start = dateheure(du)
+
+ # Date de fin de l'évènement.
+ # Sous le format "JJ/MM/AAAA HH:MM".
+
+ au = span[8].strip()
+ e.end = dateheure(au)
+
+ # Lieu de l'évènement.
+ # En une ligne. "Indéterminé" s'il n'y en a pas.
+
+ lieu = span[10].strip()
+ if lieu != "Indéterminé":
+ e.place.name = lieu
+
+ # Structure principale ayant proposé l'évènement. TODO
+ # Au format "CODE - NOM".
+
+ prop = span[12].strip()
+
+ # Dernier utilisateur ayant mis à jour l'évènement. TODO
+ # Nom seulement.
+
+ maj = span[16].strip()
+
+ # Fonctions concernées. TODO
+ # Valeurs possibles :
+ # - "Toutes".
+
+ fct = span[18].strip()
+
+ # Date de rappel de l'évènement. TODO
+ # Valeurs possibles :
+ # - "Aucune"
+ # - ?
+
+ dr = span[20].strip()
+
+ # Visibilité de l'évènement. TODO
+ # Valeurs possibles :
+ # - "Structure seule"
+ # - "Structures dépendantes"
+
+ vis = span[22].strip()
+
+ # Date de dernier envoi des invitations. TODO
+ # Valeurs possibles :
+ # - "Aucun"
+ # - ?
+
+ inv = span[24].strip()
+
+ # Ajout de l'évènement à la liste.
+
+ events.append(e)
+
+ return events
+
+ # ---
+ # XLS documents decoding.
+ # ---
+
+ def __feed_xls_operations(self, content):
+ """ Decode the XLS document for operations ("Journal_activites.xls")
+ from a pandas decoded content and feed it into the
+ manager's structures. """
+
+ print(content)
+ raise Exception
+
# ---
# JSON documents decoding.
# ---
diff --git a/sgdfi/_repr.py b/sgdfi/_repr.py
index 9ff0334..ac48b3f 100755
--- a/sgdfi/_repr.py
+++ b/sgdfi/_repr.py
@@ -20,7 +20,7 @@ from ._dbs import OperationType, OperationTypeData as _OperationTypeData, \
AllocationsRegime
__all__ = ["IID", "Title", "Structure", "Adherent", "RallyRegistration",
- "Camp", "Place", "Operation", "OperationType", "Function",
+ "Camp", "Place", "Operation", "OperationType", "Function", "Event",
"StructureType", "StructureStatus", "AllocationsRegime"]
# ---
@@ -341,6 +341,24 @@ class Place(_Base):
closest_bus_stop = _TextProperty()
closest_bus_stop_distance = _TextProperty()
+class Event(_Base):
+ """ An event. """
+
+ iid = _IIDProperty()
+ # TODO: type d'évènement
+ start = _DateProperty()
+ end = _DateProperty()
+ # TODO: structure principale ayant proposé l'évènement.
+ # TODO: dernier utilisateur ayant mis à jour l'évènement.
+ # TODO: visibilité de l'évènement.
+ # TODO: fonctions concernées.
+
+ name = _TextProperty()
+ description = _TextProperty()
+
+ # TODO: date de rappel de l'évènement.
+ # TODO: date de dernier envoi des invitations.
+
class Operation(_Base):
""" An event on the website.. """
diff --git a/sgdfi/_session.py b/sgdfi/_session.py
index 3d83b38..5a53783 100755
--- a/sgdfi/_session.py
+++ b/sgdfi/_session.py
@@ -386,4 +386,45 @@ class Session(_Manager):
return st
+ def get_events(self, year = 2018, month = 6):
+ """ Get events in the calendar. """
+
+ # Check the arguments.
+
+ if not isinstance(year, int):
+ raise ValueError("expected an integer for the year")
+ if not isinstance(month, int):
+ raise ValueError("expected an integer for the month")
+ year = int(year)
+ month = int(month)
+ if year < 2004:
+ raise ValueError("expected a year >= 2004")
+ if not month in range(1, 13):
+ raise ValueError("month should be in 1-12")
+
+ # Get the page corresponding to the selected month.
+ #
+ # FIXME: Remove the following :
+ # - `ctl00$MainContent$_navigateur$_ddStructure`: 1061745 (XXX: valeur
+ # principale sélectionnée, probablement ID interne)
+ # - `ctl00$MainContent$_navigateur$_hidCodeStructure`: 119101231 (code
+ # public de la structure), toujours le code de structure lié à
+ # la délégation courante et non celui de la structure.
+
+ path = '/Specialisation/Sgdf/animation/ConsulterCalendrier.aspx'
+ return self.get_page(path, {
+ '__EVENTTARGET': 'ctl00$MainContent$_ddlMois',
+ 'ctl00': {
+ 'ScriptManager1': 'ctl00$_upMainContent|' \
+ 'ctl00$MainContent$_ddlMois',
+ 'MainContent': {
+ '_ddlAnnees': str(year),
+ '_ddlMois': str(month),
+ '_navigateur': {
+ '_ddStructure': '1061745',
+ '_hidCodeStructure': '119101231'}}}},
+ method = self.METHOD_AJAX,
+ hint = 'calendar_month')
+
+
# End of file.