aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANIFEST.in10
-rw-r--r--README.rst108
-rw-r--r--docs/.gitignore1
-rw-r--r--docs/Makefile19
-rw-r--r--docs/conf.py173
-rw-r--r--docs/env/index.rst228
-rw-r--r--docs/env/intranet/index.rst37
-rw-r--r--docs/env/login-decouverte.pngbin0 -> 56622 bytes
-rw-r--r--docs/env/login-petitions.pngbin0 -> 16012 bytes
-rw-r--r--docs/env/login-valorise.pngbin0 -> 21606 bytes
-rw-r--r--docs/env/mailids.pngbin0 -> 41231 bytes
-rw-r--r--docs/index.rst19
-rw-r--r--docs/make.bat35
-rw-r--r--docs/usage/index.rst23
-rwxr-xr-xsgdfi/__init__.py7
-rwxr-xr-xsgdfi/__main__.py100
-rwxr-xr-xsgdfi/_db.py41
-rw-r--r--sgdfi/_decode.py1129
-rwxr-xr-xsgdfi/_intranet.py (renamed from sgdfi/_session.py)13
-rwxr-xr-xsgdfi/_manager.py1287
20 files changed, 1958 insertions, 1272 deletions
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..c0eb105
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,10 @@
+include README.rst
+include LICENSE.txt
+include MANIFEST.in
+include setup.py
+include setup.cfg
+
+include docs/*.rst
+include docs/conf.py
+include docs/Makefile
+include docs/make.bat
diff --git a/README.rst b/README.rst
index 14daa6e..4c16b8d 100644
--- a/README.rst
+++ b/README.rst
@@ -3,58 +3,54 @@ SGDFi : interact with SGDF's (Scouts et Guides de France) intranet
`Scouts et Guides de France <SGDF_>`_ is a Scouting and Guiding organization
based in France, member of the `Scoutisme Français`_. For managing its
-assets and operations, it uses an intranet based on `Intr@ssoc`_ and
-managed by `Supralog`_, which is a web application with no easy
-machine access.
-
-SGDFi is a Python 3.x which aims at bringing a machine access to this web
-application, allowing read and write access to what is managed by the intranet
-through a given adherent's account by imitating a web browser (web scraping,
-form completing, ajax simulating, and so on).
-
-Using SGDFi
------------
-
-SGDFi separate four aspects for interacting with the intranet, every one
-depending on the last ones:
-
-- **representation.** SGDFi provides objects and enumerations to represent
- intranet objects, with value validation.
-- **storing.** SGDFi provides a database which is able to manage the knowledge
- we have of the intranet by adding, completing and searching through
- objects.
-- **decoding.** SGDFi provides a manager which decodes various files (HTML
- pages and fragments, AJAX responses, XLS, XML and JSON documents, …),
- extracts valuable information from them and feeds a database to link
- the just extracted information with previously extracted information (e.g.
- structure hierarchy and structure information).
-- **interacting.** SGDFi provides a session which send the commands, and
- gather the raw documents given to a manager.
-
-The interface itself is still under heavy development.
-
-More on the intranet
---------------------
-
-The `Intr@ssoc`_ base is owned and managed by `Supralog`_, and adapted for
-each one of their clients. SGDF is not the only french scouting organization
-to use it, the `Éclaireuses et Éclaireurs de France <EEDF_>`_ use it as well.
-
-`SGDF`_'s intranet is available at `intranet.sgdf.fr`_, although a test one
-is also available at `intranet-qualification.sgdf.fr`_. It is programmed in
-``ASP.NET``, and runs on Microsoft IIS (which is tipped off by the ``Server``
-and ``X-Powered-By`` HTTP headers, and the ``.aspx`` extensions visible in
-the URLs).
-
-Information is gathered several ways:
-
-- through ``GET``-ting the page.
-- through ``POST``-ing the page while gathering fields on the ``GET`` version
- (including a CSRF token).
-- through gathering a `Microsoft Ajax`_ page fragment.
-
-For more information, I'm writing a wiki besides which might be available
-some time.
+assets and operations, and providing tools to its members, it provides a
+digital environment which only provides a human access, so no easy machine
+access.
+
+SGDFi is a Python 3.x which aims at bringing a machine access to this digital
+environment, allowing read and write access to what is managed by it,
+mostly through a given adherent's account by imitating a web browser
+(web scraping, form completing, ajax simulating, and so on).
+
+For more information about how this works and how to make it work,
+consult the documentation. (link pending)
+
+Examples
+--------
+
+These examples **CAN'T** be used yet, as they're just projections of what
+I would like as an interface.
+
+.. code-block:: python
+
+ from datetime import datetime
+ from sgdfi import Manager, ALL
+
+ start = datetime(2018, 10, 1)
+ end = datetime(2018, 10, 20)
+
+ mgr = sgdfi.Manager()
+ mgr.login("<code>", "<password>")
+ mgr.login("<code2>", "<password>")
+
+ print("== Place")
+ print(mgr.places["<place iid>"])
+ print()
+
+ print("== Events in a structure:")
+ for event in mgr.events["<structure iid>"][start:end]:
+ print(event)
+ print()
+
+ print("== All accessible events:")
+ for event in mgr.events[ALL][start:end]:
+ print(event)
+ print()
+
+ print("== Operations on adherent:")
+ for op in mgr.adherents["<adherent iid>"].operations:
+ print(op)
+ print()
What is left to do
------------------
@@ -64,6 +60,7 @@ There are plenty of things to do:
- use MIME types instead of random types for ``Manager.feed()``, and check
what we can send to ``Manager.feed()`` instead of text to also manage
binary file formats such as XLS.
+- maybe delegate?
- translate everything from the SDY project.
- isolate the database?
- manage tips on adherents through operations, e.g. only some purple shirts
@@ -75,11 +72,4 @@ There are plenty of things to do:
- document the interface when stable?
.. _SGDF: https://www.sgdf.fr/
-.. _EEDF: https://www.eedf.fr/
.. _Scoutisme Français: https://www.scoutisme-francais.fr/
-.. _Intr@ssoc: http://www.intrassoc.com/
-.. _Supralog: http://www.supralog.com
-.. _intranet.sgdf.fr: https://intranet.sgdf.fr/
-.. _intranet-qualification.sgdf.fr: http://intranet-qualification.sgdf.fr/
-
-.. _Microsoft Ajax: https://msdn.microsoft.com/fr-fr/library/ee341002(v=vs.100).aspx
diff --git a/docs/.gitignore b/docs/.gitignore
new file mode 100644
index 0000000..e35d885
--- /dev/null
+++ b/docs/.gitignore
@@ -0,0 +1 @@
+_build
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..298ea9e
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,19 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+SOURCEDIR = .
+BUILDDIR = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..f158b1b
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+#
+# Configuration file for the Sphinx documentation builder.
+#
+# This file does only contain a selection of the most common options. For a
+# full list see the documentation:
+# http://www.sphinx-doc.org/en/master/config
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'SGDFi'
+copyright = '2018, Thomas Touhey'
+author = 'Thomas Touhey'
+
+# The short X.Y version
+version = ''
+# The full version, including alpha/beta/rc tags
+release = ''
+
+
+# -- General configuration ---------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The master toctree document.
+master_doc = 'index'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = None
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'alabaster'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Custom sidebar templates, must be a dictionary that maps document names
+# to template names.
+#
+# The default sidebars (for documents that don't match any pattern) are
+# defined by theme itself. Builtin themes are using these templates by
+# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
+# 'searchbox.html']``.
+#
+# html_sidebars = {}
+
+
+# -- Options for HTMLHelp output ---------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'SGDFidoc'
+
+
+# -- Options for LaTeX output ------------------------------------------------
+
+latex_elements = {
+ # The paper size ('letterpaper' or 'a4paper').
+ #
+ # 'papersize': 'letterpaper',
+
+ # The font size ('10pt', '11pt' or '12pt').
+ #
+ # 'pointsize': '10pt',
+
+ # Additional stuff for the LaTeX preamble.
+ #
+ # 'preamble': '',
+
+ # Latex figure (float) alignment
+ #
+ # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+# author, documentclass [howto, manual, or own class]).
+latex_documents = [
+ (master_doc, 'SGDFi.tex', 'SGDFi Documentation',
+ 'Thomas Touhey', 'manual'),
+]
+
+
+# -- Options for manual page output ------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ (master_doc, 'sgdfi', 'SGDFi Documentation',
+ [author], 1)
+]
+
+
+# -- Options for Texinfo output ----------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+# dir menu entry, description, category)
+texinfo_documents = [
+ (master_doc, 'SGDFi', 'SGDFi Documentation',
+ author, 'SGDFi', 'One line description of project.',
+ 'Miscellaneous'),
+]
+
+
+# -- Options for Epub output -------------------------------------------------
+
+# Bibliographic Dublin Core info.
+epub_title = project
+
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#
+# epub_identifier = ''
+
+# A unique identification for the text.
+#
+# epub_uid = ''
+
+# A list of files that should not be packed into the epub file.
+epub_exclude_files = ['search.html'] \ No newline at end of file
diff --git a/docs/env/index.rst b/docs/env/index.rst
new file mode 100644
index 0000000..78710e8
--- /dev/null
+++ b/docs/env/index.rst
@@ -0,0 +1,228 @@
+Description of SGDF's digital environment
+=========================================
+
+This is a test at referencing the interesting components of SGDF's digital
+environment. Keep in mind that it has been constituted while watching its
+public aspect of it only.
+
+Some of the websites are documented in the `public wiki`_ of the SGDF
+group on `Gitlab`_. This wiki is mostly obsolete (but updated versions
+are private).
+
+Collaborations
+--------------
+
+As the organization collaborates with many companies, its digital environment
+is diversified. These companies include:
+
+- `Oxalide`_, which hosts a lot of websites for them; see the `VagrantBox
+ <https://gitlab.com/sgdf/public-files/wikis/VagrantBox>`_ page in the
+ public wiki available on the organization's `Gitlab`_ group.
+- `hob-france`_ which has migrated the main website.
+- `DonkeyCode`_, which makes `<https://petitions.sgdf.fr/>`_.
+- `PixelleProD`_, which makes `<https://jambville.sgdf.fr/>`_.
+- `Supralog`_, which makes and hosts the intranet.
+
+There are also companies affiliated or linked with the organization:
+
+- Scoutik, which is affiliated to the organization, and manages
+ `La Boutique du Scoutisme`_.
+- The `Presses d'Île-de-France`_, the publishing company for SGDF's books
+ (and possibly zines too?).
+
+Showcase websites
+-----------------
+
+Amongst the showcase and news websites of the organization are:
+
+- `<https://www.sgdf.fr/>`_: the main website, made with `Joomla!`_.
+- `<https://magazines.sgdf.fr/>`_: the website showcasing the magazines
+ edited by the organization.
+- `<https://photos.sgdf.fr>`_: a photo collection for internal use, but
+ for which rights and high-quality versions can be requested by journalists
+ to the organization.
+- `<http://phototheque.sgdf.fr/>`_: an older version of the website above.
+- `<http://decouverte.sgdf.fr/>`_: a website presenting the current season's
+ discovery week-ends and allowing to register to one of them, made with
+ `Plone`_.
+- `<http://biblio.sgdf.fr/>`_ : the Centre National's online catalog, made
+ with `Koha`_.
+- `<https://jambville.sgdf.fr/>`_: the online website for Jambville and
+ Scoutik, made with `Joomla!`_.
+- `<https://valorise-toi.sgdf.fr/>`_: the “Valorise-toi” tool, which allows
+ you to highlight the skills you have acquired, are acquiring or wish to
+ acquire through your experience as a scout leader, for when you're looking
+ for a job for example.
+- `<https://formation.sgdf.fr/>`_: a training course lookup tool, easier to
+ use than the one on the intranet (although you still need the intranet
+ to register).
+- `<https://peuplade.sgdf.fr/>`_: the official website for the
+ “louveteaux-jeanettes” branch (8 to 11 y.o.).
+- `<https://tribu.sgdf.fr/>`_: the official website for the
+ “scouts-guides” branch (11 to 14 y.o.).
+- `<https://caravane.sgdf.fr/>`_: the official website for the
+ “pionniers-caravelles” branch (14 to 17 y.o.).
+- `<https://compagnons.sgdf.fr/>`_: the official website for the
+ “compagnons” branch (17 to 19~20 y.o.).
+
+The `main website <https://www.sgdf.fr/>`_, also contains a few tools:
+
+- `Doc en Stock <https://www.sgdf.fr/vos-ressources/doc-en-stock>`_, which
+ provides some useful and sorted documentation, built with `Phoca Download`_,
+ a `Joomla!`_ plugin.
+
+The Intranet
+------------
+
+Most management actions linked to the organization goes through the intranet,
+available at `<https://intranet.sgdf.fr/>`_. It is edited, maintained and
+hosted by `Supralog`_ since circa 2006. A test instance of this intranet
+is available at `<http://intranet-qualification.sgdf.fr/>`_.
+
+Once the registration process for a person in charge is complete, this person
+will receive an e-mail from `adherents@sgdf.fr <mailto:adherents@sgdf.fr>`_
+with their adherent code, which serves as a login name, and a default
+password:
+
+.. image:: mailids.png
+
+These credentials will be useful for logging into the intranet, but also
+other websites, including:
+
+- `<http://decouverte.sgdf.fr/>`_:
+
+ .. image:: login-decouverte.png
+- `<https://petitions.sgdf.fr/>`_:
+
+ .. image:: login-petitions.png
+- `<https://valorise-toi.sgdf.fr/>`_:
+
+ .. image:: login-valorise.png
+
+The intranet allows one to manage, amongst other things:
+
+- registrations and adherents.
+- functions within the organization (nominations, …).
+- structures (groups, territories, …).
+- estate, places.
+- events (meetings, work meetings, camps, rallies, …).
+- accountancy (spendings, donations, …).
+- training courses.
+- accidents and sinisters.
+
+For more information, please consult the following pages:
+
+.. toctree::
+ :maxdepth: 2
+
+ intranet/index
+
+Unimplemented
+-------------
+
+Some domains lead to nothing. They could be works in progress or old
+websites for which the domain still exists: the answer usually is either
+“Oups ! Page not found ! (Error 404)” while getting the index (also working
+in HTTPS using a TLS certificate having ``*.sgdf.fr`` as the server name),
+or no answer whatsoever.
+
+These domains are the following:
+
+- `<https://api.sgdf.fr/>`_ (default page).
+- `<https://radio.sgdf.fr/>`_ (default page).
+- `<https://www-dev.sgdf.fr/>`_ (hosted at `Oxalide`_, doesn't answer).
+
+Redirects
+---------
+
+Some domains are only there to redirect to other pages, usually to a page
+on the main website. These redirects are the following:
+
+- `<http://scoutsetguides.fr/>`_ and its ``www.`` counterpart (old domain):
+ `<http://www.sgdf.fr/>`_.
+- `<https://extranet.sgdf.fr/>`_:
+ `<https://www.sgdf.fr/espace-chefs-et-cadres>`_.
+- `<https://design.sgdf.fr/>`_:
+ `<https://www.sgdf.fr/actualites-chefs-cadres/toute-l-actualites-chefs-cadres/les-actualites/2417-refonte-identite-visuelle-reponds-a-l-enquete>`_.
+
+Technical domains
+-----------------
+
+Some domains only have been setup for technical reasons. Amongst them are:
+
+- `<https://dev.sgdf.fr:8443/>`_: answers with a self-signed certificate
+ and redirects to the home page of a Juniper SRX220H2 router, which probably
+ also serves as a firewall.
+- `<https://db.sgdf.fr/phpmyadmin/>`_: asks for credentials and probably
+ leads to a phpmyadmin-like interface.
+- `<https://a.sgdf.fr/>`_: a shortlink platform provided by `bitly`_.
+- `<https://r.sgdf.fr/>`_: a shortlink platform provided by `Mailjet`_.
+- `<https://ws.sgdf.fr/>`_: old website to host some elements such as the
+ script for the cookie bar as documented `here
+ <https://gitlab.com/sgdf/public-files/wikis/eu-cookie-bar-usage>`_ (see
+ `<https://ws.sgdf.fr/cdn/js/cookie-manager.js>`_).
+
+Other platforms
+---------------
+
+Amongst the other websites more or less intern to the organization, one
+can find:
+
+- `<https://sites.sgdf.fr/>`_: a website platform for groups and territories.
+- `<https://blogs.sgdf.fr/>`_: the older version of the above, which will
+ be shut down by 2019.
+- `<https://salaries.sgdf.fr/>`_: an intranet for the organization's employees,
+ runs on `SPIP`_.
+- `<http://frais.sgdf.fr/>`_: an expenses management platform (?), running
+ on `Notys`_.
+- `<https://comptaweb.sgdf.fr/>`_: an accountability website (probably built
+ with `Symfony`_ and `FOSUserBundle`_).
+- `<https://compta.sgdf.fr/>`_: an accountability consulting website
+ (probably built with `Symfony`_ and `FOSUserBundle`_, identified as
+ ``sgdfcomptabilite``), more recent than the above (uses CSRF tokens when
+ the other one doesn't).
+- `<https://don.sgdf.fr/>`_: donation platform.
+- `<https://support.sgdf.fr/>`_: ticket platforms (IT support).
+- `<http://e-learning.sgdf.fr/>`_: e-learning platform for people in charge
+ (built with `Moodle`_).
+- `<https://mesdocuments.sgdf.fr/>`_: digital documents platform (?).
+- `<https://petitions.sgdf.fr/>`_: platform for proposing and signing
+ resolutions for the general meetings (probably built with `Symfony`_ and
+ `FOSUserBundle`_).
+- `<https://campagnederentree.sgdf.fr/>`_: ordering platform for
+ communication products for the current season's start.
+- `<https://collaboratif.sgdf.fr/>`_: collaborative platform built
+ with `Alfresco`_.
+- `<https://trouvetoncamp.fr/>`_: a search tool for camps, intended for
+ scout leaders and other team members.
+- `<https://www.laboutiqueduscoutisme.com/>`_: the official online shop for
+ the organization, managed by Scoutik.
+- `<https://gitlab.com/sgdf>`_: the official page of the organization on
+ `Gitlab`_.
+
+.. _Oxalide: https://www.oxalide.com/
+.. _hob-france: https://www.hob-fr.com/
+.. _DonkeyCode: https://www.donkeycode.com/
+.. _PixelleProD: http://www.pixelleprod.com/
+.. _Supralog: http://www.supralog.com
+.. _bitly: https://bitly.com/
+.. _Mailjet: https://www.mailjet.com/
+
+.. _La Boutique du Scoutisme: https://www.laboutiqueduscoutisme.com/
+.. _Presses d'Île-de-France: http://www.presses-idf.fr/
+
+.. _Gitlab: https://about.gitlab.com/
+
+.. _public wiki: https://gitlab.com/sgdf/public-files/wikis/home
+
+.. _WordPress: https://wordpress.org/
+.. _Plone: https://plone.org/
+.. _Koha: https://koha-community.org/
+.. _Joomla!: http://www.joomla.org/
+.. _Phoca Download: https://www.phoca.cz/phocadownload
+.. _SPIP: https://www.spip.net/fr_rubrique91.html
+.. _Notys: http://www.notys.fr/
+.. _Moodle: https://moodle.org/
+.. _Alfresco: https://fr.wikipedia.org/wiki/Alfresco
+.. _Symfony: https://symfony.com/
+.. _FOSUserBundle: https://github.com/FriendsOfSymfony/FOSUserBundle/
diff --git a/docs/env/intranet/index.rst b/docs/env/intranet/index.rst
new file mode 100644
index 0000000..7200d36
--- /dev/null
+++ b/docs/env/intranet/index.rst
@@ -0,0 +1,37 @@
+The intranet
+============
+
+For historical details and context, consult `SGDF's digital environment
+description <../index.html#the-intranet>`_.
+
+The intranet is available through `<https://intranet.sgdf.fr/>`_ (and a
+test instance is available through
+`<http://intranet-qualification.sgdf.fr/>`_). It is an `Intr@ssoc`_ derivative,
+which is made in `ASP.NET`_ (that's tipped off by the ``X-Powered-By`` header
+and the ``.aspx`` extensions in the URL) and runs on a webserver that
+identifies itself as `Microsoft IIS`_ 7.5.
+
+SGDF is not the only french scouting organization to use it, the
+`Éclaireuses et Éclaireurs de France <EEDF_>`_ also do.
+
+There are three kind of interactions with the intranet:
+
+- through ``GET``-ting a page.
+- through ``POST``-ing a page while gathering fields on the ``GET`` version
+ (including a CSRF token).
+- through gathering a `Microsoft Ajax`_ page fragment.
+
+Returned documents are usually one of these:
+
+- ``text/html``: an HTML document or fragment.
+- ``text/csv``: a CSV document.
+- ``text/xml``: an XML document.
+- ``application/x-microsoft-ajax`` (sent as ``text/plain``): a
+ `Microsoft Ajax`_ response.
+- ``application/json``: a JSON document.
+
+.. _Intr@ssoc: http://www.intrassoc.com/
+.. _ASP.NET: https://www.asp.net/
+.. _Microsoft IIS: https://www.iis.net/
+.. _Microsoft Ajax: https://msdn.microsoft.com/fr-fr/library/ee341002(v=vs.100).aspx
+.. _EEDF: https://www.eedf.fr/
diff --git a/docs/env/login-decouverte.png b/docs/env/login-decouverte.png
new file mode 100644
index 0000000..80f605c
--- /dev/null
+++ b/docs/env/login-decouverte.png
Binary files differ
diff --git a/docs/env/login-petitions.png b/docs/env/login-petitions.png
new file mode 100644
index 0000000..bb537b4
--- /dev/null
+++ b/docs/env/login-petitions.png
Binary files differ
diff --git a/docs/env/login-valorise.png b/docs/env/login-valorise.png
new file mode 100644
index 0000000..2b7974c
--- /dev/null
+++ b/docs/env/login-valorise.png
Binary files differ
diff --git a/docs/env/mailids.png b/docs/env/mailids.png
new file mode 100644
index 0000000..5da4114
--- /dev/null
+++ b/docs/env/mailids.png
Binary files differ
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..dfa73e7
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,19 @@
+Welcome to SGDFi's documentation!
+=================================
+
+`Scouts et Guides de France <SGDF_>`_ is a Scouting and Guiding organization
+based in France, member of the `Scoutisme Français`_. For managing its
+assets and operations, and providing tools to its members, it provides a
+digital environment including an intranet and several other websites.
+
+The SGDFi project, where the “i” stands for “intranet”, is a project to
+provide machine access to the websites in this digital environment.
+
+.. toctree::
+ :maxdepth: 2
+
+ usage/index
+ env/index
+
+.. _SGDF: https://www.sgdf.fr/
+.. _Scoutisme Français: https://www.scoutisme-francais.fr/
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..27f573b
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+
+:end
+popd
diff --git a/docs/usage/index.rst b/docs/usage/index.rst
new file mode 100644
index 0000000..aee8ac3
--- /dev/null
+++ b/docs/usage/index.rst
@@ -0,0 +1,23 @@
+Using SGDFi
+===========
+
+SGDFi separate five aspects for interacting with the intranet, every one
+depending on the last ones:
+
+- **representation.** SGDFi provides objects and enumerations to represent
+ intranet objects, with value validation.
+- **storing.** SGDFi provides a database which is able to manage the knowledge
+ we have of the intranet by adding, completing and searching through
+ objects.
+- **decoding.** SGDFi provides a decoder which decodes various files (HTML
+ pages and fragments, AJAX responses, XLS, XML and JSON documents, …),
+ extracts valuable information from them and feeds a database to link
+ the just extracted information with previously extracted information (e.g.
+ structure hierarchy and structure information).
+- **interacting.** SGDFi provides a session which send the commands, and
+ gather the raw documents given to a manager.
+- **managing.** SGDFi provides a manager which manages sessions, decoders
+ and a database.
+
+The interface is still under heavy development, so it goes undocumented
+for now.
diff --git a/sgdfi/__init__.py b/sgdfi/__init__.py
index aa40b62..db40661 100755
--- a/sgdfi/__init__.py
+++ b/sgdfi/__init__.py
@@ -5,9 +5,10 @@
#******************************************************************************
""" SGDFi allows you to interact with SGDF's intranet. """
-from ._session import Session
from ._version import version
-
-__all__ = ["version", "Session"]
+from ._manager import Manager, ALL
+from ._repr import Structure, Adherent, RallyRegistration, Camp, Place, \
+ Operation, OperationType, Function, Event, StructureType, \
+ StructureStatus, AllocationsRegime
# End of file.
diff --git a/sgdfi/__main__.py b/sgdfi/__main__.py
deleted file mode 100755
index a7cfe80..0000000
--- a/sgdfi/__main__.py
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/usr/bin/env python3
-#******************************************************************************
-# Copyright (C) 2018 Thomas "Cakeisalie5" Touhey <thomas@touhey.fr>
-# This file is part of the sgdfi project, which is MIT-licensed.
-#******************************************************************************
-""" Main script for testing things with SGDFi. """
-
-import os.path as _path
-
-def get_session():
- """ Get a logged in session. """
-
- from . import Session as _Session
-
- path = _path.join(_path.dirname(__file__), '..', '..', 'logins.txt')
- user, pw = (x.splitlines()[0] for x in open(path).readlines())
-
- return _Session(user = user, pw = pw, save = True)
-
-def test_session():
- """ Test the session. """
-
- s = get_session()
-
- #ret = s.get_ops('4', "yCbyTmNDHpp8CotDhWoEkQ==")
- #ret = s.get_place('ONCt5ZDoIUZIXlYv9QyGuw==')
- #ret = s.get_structure('oA0pOxhIxVX5eh6fTNvgzg==')
- #ret = s.get_events(2018, 10)
-
- if type(ret) == list:
- for e in ret:
- print(e)
- print()
- else:
- print(ret)
-
-def test_repr():
- """ Test the representations. """
-
- from datetime import datetime as _datetime
- from ._repr import Operation as _Operation, Adherent as _Adherent, \
- Function as _Function
-
- op = _Operation()
- op.time = _datetime(2018, 10, 1, 0, 17, 38)
- op.type = "Individu / Abonnement "
- op.author.name = "LEFEBVRE CAROLE"
- op.fields = "Revue: Revue Louveteau-Jeannette, Type: Gratuit, " \
- "Fin: 31/08/2019, Prix: 0,00€ "
-
- ad = _Adherent()
- ad.iid = "yCbyTmNDHpp8CotDhWoEkQ=="
- ad.name = "TOUHEY THOMAS"
- ad.function = _Function.CHEF_PIONNIER_CARAVELLE
-
- op.related.add(ad)
- print(op)
-
- #print(op.fields['Type'])
-
-def test_save():
- """ Test the saving. """
-
- from ._manager import Manager
-
- man = Manager(save = True)
- man.save("hello world", ext = "html")
-
-def test_load():
- """ Test the loading. """
-
- from ._manager import Manager
-
- man = Manager()
- ret = man.load_dump("2018100620105800-operations.xls")
- #ret = man.load_dump("2018100516460200-calendar_month.txt")
-
- if type(ret) == list:
- for e in ret:
- print(e)
- print()
- else:
- print(ret)
-
-def test_funcs():
- """ Test getting the functions. """
-
- s = get_session()
-
- f = s.get_functions()
- s.export_functions(f)
-
-if __name__ == '__main__':
- #test_repr()
- #test_session()
- #test_save()
- test_load()
- #test_funcs()
-
-# End of file.
diff --git a/sgdfi/_db.py b/sgdfi/_db.py
index d1dc6aa..4d33f4a 100755
--- a/sgdfi/_db.py
+++ b/sgdfi/_db.py
@@ -6,6 +6,10 @@
""" This submodule defines the local database (or data manager) for managing
our knowledge of the intranet. """
+from ._repr import Structure as _Structure, Adherent as _Adherent, \
+ RallyRegistration as _RallyRegistration, Camp as _Camp, \
+ Place as _Place, Operation as _Operation, Event as _Event
+
__all__ = ["Database"]
class Database:
@@ -34,38 +38,13 @@ class Database:
# Feed the database with fetched (and possibly incomplete) data.
# ---
- def add_structure(self, st):
- """ Add a structure. """
-
- # TODO
- pass
-
- def add_adherent(self, ad):
- """ Add an adherent. """
-
- # TODO
- pass
-
- def add_rally_registration(self, rreg):
- """ Add a rally registration. """
-
- # TODO
- pass
-
- def add_camp(self, cp):
- """ Add a camp. """
-
- # TODO
- pass
-
- def add_place(self, pl):
- """ Add a place. """
-
- # TODO
- pass
+ def add(self, obj):
+ """ Add or update an object to the database. """
- def add_op(self, op):
- """ Add an operation. """
+ accepted = (_Structure, _Adherent, _RallyRegistration, _Camp, _Place,
+ _Operation, _Event)
+ if all(not isinstance(obj, x) for x in accepted):
+ raise TypeError("Unaccepted object in the database.")
# TODO
pass
diff --git a/sgdfi/_decode.py b/sgdfi/_decode.py
new file mode 100644
index 0000000..f63eb78
--- /dev/null
+++ b/sgdfi/_decode.py
@@ -0,0 +1,1129 @@
+#!/usr/bin/env python3
+#******************************************************************************
+# Copyright (C) 2018 Thomas "Cakeisalie5" Touhey <thomas@touhey.fr>
+# This file is part of the sgdfi project, which is MIT-licensed.
+#******************************************************************************
+""" Main object to decode files and dumps from SGDF's intranet. """
+
+from itertools import count as _count
+from datetime import datetime as _datetime
+from re import finditer as _rfindter, sub as _rreplace, split as _rsplit
+from io import IOBase as _IOBase, StringIO as _StringIO, BytesIO as _BytesIO, \
+ TextIOWrapper as _TextIOWrapper
+from base64 import b64decode as _b64decode
+from urllib.parse import urlparse as _urlparse, parse_qs as _parse_qs, \
+ unquote as _unquote
+from html import unescape as _htmlunescape
+from json import loads as _jsonloads
+from csv import reader as _csvreader
+
+from bs4 import BeautifulSoup as _BeautifulSoup
+from pandas import read_excel as _read_excel
+
+from ._repr import Structure as _Structure, Adherent as _Adherent, \
+ Place as _Place, RallyRegistration as _RallyRegistration, Camp as _Camp, \
+ Operation as _Operation, FunctionRawData as _FunctionRawData
+
+__all__ = ["Decoder"]
+
+# ---
+# Utilities.
+# ---
+
+# Pagination object.
+
+class _Pagination:
+ def __init__(self):
+ self.current = 0
+ self.number = 0
+ self.more = False
+
+# As `type` is a local variable in some functions, we define an alias to
+# the real function named `_type`.
+
+_type = type
+
+# Ajax field for when decoding an Ajax response.
+
+class _AjaxField:
+ """ Microsoft Ajax response part. """
+
+ def __init__(self, code, name, value, text):
+ self.__code = str(code)
+ self.__name = str(name)
+ self.__value = str(value)
+ self.__text = str(text)
+
+ def __repr__(self):
+ return f'Field(id = {repr(self.code)}, name = {repr(self.name)}, ' \
+ f'attrib = {repr(self.value)})'
+
+ @property
+ def code(self):
+ """ The code (first field). """
+
+ return self.__code
+
+ @property
+ def name(self):
+ """ The name (second field). """
+
+ return self.__name
+
+ @property
+ def value(self):
+ """ The value (third field). """
+
+ return self.__value
+
+ @property
+ def text(self):
+ """ The text (fourth field). """
+
+ return self.__text
+
+# ---
+# Main class.
+# ---
+
+def _validsym(s):
+ """ Check if is a valid symbol part. """
+
+ allowed_fst = 'abcdefghijklmnopqrstuvwxyz'
+ allowed_end = allowed_fst + '0123456789'
+ allowed_all = allowed_end + '_'
+
+ if s is None:
+ return True
+
+ if s != s.casefold():
+ return False
+ if not len(s) or len(s) > 30:
+ return False
+ if any(x not in allowed_all for x in s):
+ return False
+ if s[0] not in allowed_fst or s[-1] not in allowed_end:
+ return False
+ return True
+
+class Decoder:
+ """ Return objects from SGDF's intranet. """
+
+ def __init__(self):
+ self._pgn = _Pagination()
+
+ # Find out the AJAX code parts.
+
+ self._ajaxcodes = {}
+ for ctx in ('intranet',):
+ frg = (x[13:-9] for x in dir(self) \
+ if x.startswith(f"_decode_html_{ctx}_") \
+ and x.endswith("_fragment"))
+
+ self._ajaxcodes[ctx] = {getattr(self,
+ f"_decode_ajax_{frag}_codes"): frag for frag in frg \
+ if hasattr(self, f"_decode_ajax_{frag}_codes")}
+
+ def decode(self, inp, type = 'text/html', hint = None, **kwargs):
+ """ Decode a document and return what's in it.
+
+ `content`: the content as a stream, bytes or text object.
+ `type`: the MIME type.
+ `hint`: the hint, including the context (e.g.
+ `intranet_place`). """
+
+ if hint == 'ignore':
+ return None
+
+ # Sanitize the keyword arguments.
+
+ for kw in ('tree', 'content', 'text', 'entries', 'data'):
+ try:
+ del kwargs[kw]
+ except KeyError:
+ pass
+
+ # Check that the document type is valid.
+
+ if not type in ('text/html', 'text/csv', 'text/xml',
+ 'application/x-microsoft-ajax', 'application/json',
+ 'application/vnd.ms-excel'):
+ raise ValueError(f"Unknown document type: {repr(type)}")
+
+ # Check that the hint is valid.
+
+ if hint is not None and not _validsym(hint):
+ raise ValueError("invalid hint format")
+
+ # FIXME: save?
+ # FIXME: in case of a stream, we want to seek back to where we are.
+
+ if self.__save:
+ self.save_dump(content, hint, type)
+
+ if hint == 'raw':
+ return content
+
+ # FIXME: check if the document is actually an error?
+ # FIXME: get request data somehow, such as iid?
+
+ if type == 'text/html':
+ # An HTML page or fragment, use BeautifulSoup.
+
+ if not hint:
+ raise ValueError(f"Missing HTML hint")
+ try:
+ func = getattr(self, f"_decode_html_{hint}")
+ except AttributeError:
+ msg = f"Unknown HTML hint: {repr(hint)}"
+ raise ValueError(msg) from None
+
+ content = _BeautifulSoup(inp, 'lxml')
+ result = func(content)
+ elif type == 'application/x-microsoft-ajax':
+ # The input might be a stream.
+
+ if isinstance(inp, _RawIOBase):
+ if _type(inp.read(0)) == bytes:
+ inp = _TextIOWrapper(inp)
+
+ content = inp.read()
+ elif _type(inp) == bytes:
+ content = inp.decode('UTF-8')
+ elif _type(inp) == str:
+ content = str(inp)
+ else:
+ raise ValueError("Invalid input type")
+
+ # Answers to Microsoft Ajax calls are a set of elements separated
+ # by pipes ('|'), and must be read four by four as described
+ # in the intranet's Ajax documentation.
+
+ resp = []
+ raw = iter(content.split('|')[:-1])
+ for code in raw:
+ code = int(code)
+ name = next(raw)
+ attrib = next(raw)
+ text = next(raw)
+
+ resp.append(_AjaxField(code, name, attrib, text))
+
+ # The answer's payload usually is in the second entry.
+ # The code may allow to determinate what it corresponds to,
+ # but a lot of codes can be used depending on the actions
+ # leading to the fragment gathering and the context (e.g.
+ # the state of the search form), so we'll try to match one
+ # of the known codes and trust the context for the other cases.
+
+ field = resp[1]
+
+ # If we need a hint, let's find out the hint.
+
+ if hint is None:
+ try:
+ part = self._ajaxcodes[hint][field.code]
+ except KeyError:
+ raise ValueError("ajax hint could not be determined")
+ else:
+ part = hint
+
+ try:
+ assert hint != None
+ func = getattr(self, f"_decode_html_{hint}_fragment")
+ except (AssertionError, AttributeError):
+ try:
+ hint = self._ajaxcodes[hint][field.code]
+ func = getattr(self, f"_decode_html_{hint}_fragment")
+ except (KeyError, AttributeError):
+ msg = "Ajax hint could not be determined"
+ raise ValueError(msg) from None
+
+ return func(field.text)
+ elif type == 'text/csv':
+ # The input might be of different type, we want a text stream.
+
+ if isinstance(inp, _RawIOBase):
+ if _type(inp.read(0)) == bytes:
+ inp = _TextIOWrapper(inp)
+ elif _type(inp) == bytes:
+ inp = _TextIOWrapper(_BytesIO(inp))
+ elif _type(inp) == str:
+ inp = _StringIO(inp)
+ else:
+ raise ValueError("Invalid input type")
+
+ # Try to find out for the hint.
+
+ try:
+ assert hint != None
+ func = getattr(self, f"_decode_csv_{hint}")
+ except (AssertionError, AttributeError):
+ raise ValueError(f"Invalid CSV hint: {hint}") from None
+
+ # Decode the file through the CSV reader, and send it to the
+ # function.
+
+ reader = _csvreader(content, delimiter = ';')
+ resp = [row for row in reader]
+
+ return func(resp)
+ elif type == 'application/vnd.ms-excel': # XLS document.
+ # The input might be of different type, we want a text stream.
+
+ if isinstance(inp, _RawIOBase) and _type(inp.read(0)) == bytes:
+ pass # ok!
+ elif _type(inp) == bytes:
+ inp = _BytesIO(inp)
+ else:
+ raise ValueError("Invalid input type (expected bytes)")
+
+ # Get the hint.
+
+ try:
+ assert hint != None
+ func = getattr(self, f"_decode_xls_{hint}")
+ except (AssertionError, AttributeError):
+ raise ValueError(f"Invalid XLS hint: {hint}") from None
+
+ # Read the entries through pandas and call the function.
+
+ def entries(content):
+ """ Récupération des entrées depuis une dataframe, et
+ extraction en tant qu'itérateur. """
+
+ df = _read_excel(stream)
+
+ for i in _it.count():
+ try:
+ yield {i.replace(".", ""): j \
+ for i, j in dict(df.ix[i]).items()}
+ except KeyError:
+ break
+
+ resp = [e for e in entries(content)]
+ return func(resp)
+ elif type == 'text/xml':
+ # We ought to read the XML document through BeautifulSoup.
+
+ content = _BeautifulSoup(inp, 'lxml')
+
+ # And here we manage this manually.
+ # TODO: there is a more elegant solution.
+
+ if hint == 'intranet_functions':
+ data = _jsonloads(tree.getroot().text)
+ return self.__decode_json_intranet_functions(data)
+ else:
+ raise ValueError("Unknown XML hint: {repr(hint)}")
+ elif type == 'application/json':
+ # The input might be of different type, we want a text stream.
+
+ if isinstance(inp, _RawIOBase):
+ if _type(inp.read(0)) == bytes:
+ inp = _TextIOWrapper(inp)
+ elif _type(inp) == bytes:
+ inp = _TextIOWrapper(_BytesIO(inp))
+ elif _type(inp) == str:
+ inp = _StringIO(inp)
+ else:
+ raise ValueError("Invalid input type")
+
+ # Try to find out for the hint.
+
+ try:
+ assert hint != None
+ func = getattr(self, f"_decode_json_{hint}")
+ except (AssertionError, AttributeError):
+ raise ValueError(f"Invalid JSON hint: {hint}") from None
+
+ # Then load using the standard json module, and send it to
+ # the function.
+
+ data = _jsonloads(content)
+ return func(data)
+ else:
+ raise ValueError(f"unknown type: {repr(type)}")
+
+ # ---
+ # HTML pages decoding.
+ # ---
+
+ def _decode_html_intranet_operations(self, tree):
+ """ Decode the HTML operations from a BeautifulSoup decoded
+ content. """
+
+ stprefix = '/Specialisation/Sgdf/structures/ResumeStructure.aspx'
+ adprefix = '/Specialisation/Sgdf/adherents/ResumeAdherent.aspx'
+ irprefix = '/Specialisation/Sgdf/Rassemblements/' \
+ 'InscriptionRassemblementV2.aspx'
+ cpprefix = '/Specialisation/Sgdf/camps/ConsulterModifierCamp.aspx'
+ laprefix = '/Specialisation/Sgdf/Commun/ResumeLieuActivite.aspx'
+
+ parent = tree.find(id = 'ctl00_Popup__evenements__gvEvenements')
+ if not parent:
+ return []
+
+ elts = []
+
+ # Récupération de la pagination.
+ # `numpages` : numéro maximal de page dans la pagination.
+ # `more`: y a-t-il plus de pages (la dernière page est-elle
+ # en « ... » ?).
+ # `curpage` : page actuelle selon la pagination.
+
+ p = parent.find('tr', {'class': ['pagination']})
+ if p != None:
+ p = p.find('tr')
+ td = p.find_all('td')[-1]
+ button = next(td.children)
+
+ if button.name == 'span':
+ num = button.text.strip()
+ more = False
+ else:
+ num = button['href']
+ num = num[num.find("'Page$") + 6:]
+ num = num[:num.find("'")]
+ more = button.text == '...'
+
+ numpages = int(num)
+
+ for td in p.find_all('td'):
+ child = next(td.children)
+ if child.name == 'span':
+ curpage = int(child.text)
+ break
+ else:
+ curpage = 1
+ numpages = 1
+ more = False
+
+ self._pgn.current = curpage
+ self._pgn.number = numpages
+ self._pgn.more = more
+
+ # Récupération de la liste d'évènements.
+
+ if not parent.find('tr', attrs = {'class': ['vide']}):
+ for elt in parent.find_all('tr', recursive = False):
+ try:
+ assert 'entete' in elt['class']
+ continue
+ except:
+ pass
+ try:
+ assert 'pagination' in elt['class']
+ continue
+ except:
+ pass
+
+ td = elt.find_all('td')
+ ch = iter(td)
+
+ edate = next(ch).text.strip()
+ ename = next(ch).text.strip()
+ etype = next(ch).text.strip()
+ eobjs = next(ch)
+ edesc = next(ch).text.strip()
+
+ # Time decoding.
+
+ d, t = edate.split()
+ day, mon, year = map(int, d.split('/'))
+ hour, min, sec = map(int, t.split(':'))
+ dt = _datetime(year, mon, day, hour, min, sec)
+
+ # Operation creation.
+
+ op = _Operation()
+ op.author.name = ename
+ if not op.author.name:
+ # XXX: Bogus event of type `Individu /`… for now we're
+ # ignoring it.
+
+ continue
+
+ op.time = dt
+ op.type = etype
+ op.fields = edesc
+
+ # Objects decoding.
+
+ for link in eobjs.find_all('a'):
+ name = link.text
+ url = _urlparse(link['href'])
+ if url.path == stprefix:
+ st = _Structure()
+ st.iid = _parse_qs(url.query)['id'][0]
+ st.name = name
+ op.related.add(st)
+ elif url.path == adprefix:
+ ad = _Adherent()
+ ad.iid = _parse_qs(url.query)['id'][0]
+ ad.name = name
+ op.related.add(ad)
+ elif url.path == irprefix:
+ ir = _RallyRegistration()
+ ir.iid = _parse_qs(url.query)['id'][0]
+ ir.name = name
+ op.related.add(ir)
+ elif url.path == cpprefix:
+ cp = _Camp()
+ cp.iid = _parse_qs(url.query)['IdCamp'][0]
+ cp.name = name
+ op.related.add(cp)
+ elif url.path == laprefix:
+ la = _Place()
+ la.iid = _parse_qs(url.query)['id'][0]
+ la.name = name
+ op.related.add(la)
+
+ elts.append(op)
+
+ return elts
+
+ def _decode_html_intranet_place(self, tree):
+ """ Decode the HTML place from a BeautifulSoup decoded
+ content. """
+
+ parent = tree.find(id = 'ctl00__upMainContent')
+ rp = 'ctl00_MainContent__resume__'
+
+ place = _Place()
+
+ # Informations générales: Libellé.
+
+ lib = parent.find(id = f'{rp}lbLibelle')
+ place.name = lib.text
+
+ # Informations générales: Description.
+
+ desc = parent.find(id = f'{rp}lbDescription')
+ place.description = desc.text
+
+ # Informations générales: Fiche. TODO
+ # XXX: absence de champ… faut voir ce que ça donne quand c'est rempli ?
+
+ # Coordonnées: Adresse (lignes).
+ # Avec plusieurs lignes du type `lbLigne1`, `lbLigne2`, `lbLigne3`.
+ # Certaines lignes peuvent ne pas être présentes, e.g. `lbLigne2`
+ # peut manquer.
+
+ def lines():
+ for i in range(1, 4):
+ try:
+ aid = f'{rp}resumeAdresse__lbLigne{i}'
+ al = parent.find(id = aid)
+ assert al != None
+ except:
+ continue
+ yield al.text.strip()
+
+ place.address = '\n'.join(lines())
+
+ # Coordonnées: Adresse (code postal).
+
+ cp = parent.find(id = f'{rp}resumeAdresse__lbCodePostal')
+ place.postal_code = cp.text
+
+ # Coordonnées: Adresse (nom de la commune).
+
+ vil = parent.find(id = f'{rp}resumeAdresse__lbVille')
+ place.town = vil.text
+
+ # Coordonnées: Adresse (pays).
+
+ pays = parent.find(id = f'{rp}resumeAdresse__lbPays')
+ place.country = pays.text
+
+ # Coordonnées: département administratif.
+ # Au format `XX - Nom du département` (où XX représente le numéro).
+
+ dept = parent.find(id = f'{rp}lbDepartementAdministratif')
+ place.department = dept.text
+
+ # Coordonnées: étranger.
+ # "Oui" ou "Non" selon si le lieu se trouve en France ou non… ?
+
+ etr = parent.find(id = f'{rp}lbEtranger')
+ place.out_of_france = (True, False)[etr.text == 'Non']
+
+ # Coordonnées: continent.
+ # Nom du continent, e.g. « Europe ».
+
+ cont = parent.find(id = f'{rp}lbContinent')
+ place.continent = cont.text
+
+ # Coordonnées: numéro de téléphone.
+ # Numéro de téléphone fixe associé au lieu.
+
+ phone = parent.find(id = f'{rp}lbTelephone')
+ place.phone = phone.text
+
+ # Coordonnées: numéro de fax.
+ # Numéro de fax associé au lieu.
+
+ fax = parent.find(id = f'{rp}lbFax')
+ place.fax = fax.text
+
+ # Coordonnées: adresse de courriel.
+ # Adresse de courriel associée au lieu.
+
+ email = parent.find(id = f'{rp}lbCourriel')
+ place.email = email.text
+
+ # Informations complémentaires: numéro J&S.
+ # XXX: ?? (vide sur les lieux explorés).
+
+ numjs = parent.find(id = f'{rp}lbNumeroJS')
+ place.js = numjs.text
+
+ # Informations complémentaires: hébergement "dur".
+ # "Oui" s'il y a un hébergement en "dur" sur le lieu, "Non" sinon.
+
+ hebd = parent.find(id = f'{rp}lbHebergementDur')
+ place.hardwall = (True, False)[hebd.text == 'Non']
+
+ # Informations complémentaires: numéro de local.
+ # XXX: ?? (vide sur les lieux explorés).
+
+ numloc = parent.find(id = f'{rp}lbNumeroLocal')
+ place.localnumber = numloc.text
+
+ # Informations complémentaires: propriétaire.
+ # Nom du propriétaire, e.g. « Bertrand DUPONT » (format libre).
+
+ prop = parent.find(id = f'{rp}lbProprietaire')
+ place.owner = prop.text
+
+ # Informations complémentaires: adresse du propriétaire.
+ # XXX: ?? (vide sur les lieux explorés).
+
+ addrp = parent.find(id = f'{rp}lbAdresseProprietaire')
+ place.owner_address = addrp.text
+
+ # Accès: numéro de carte IGN.
+ # XXX: ?? (vide sur les lieux explorés).
+
+ ign = parent.find(id = f'{rp}lbNumeroCarteIGN')
+ place.ign_num = ign.text
+
+ # Accès: accès voiture.
+ # "Oui" si le lieu est accessible en voiture, "Non" sinon.
+
+ voit = parent.find(id = f'{rp}lbAccesVoiture')
+ place.car_access = (True, False)[voit.text == 'Non']
+
+ # Accès: distance de la gare la plus proche.
+ # Au format libre (e.g. « 5km »).
+
+ gard = parent.find(id = f'{rp}lbDistanceGare')
+ place.closest_train_station_distance = gard.text
+
+ # Accès: nom de la gare la plus proche.
+ # Au format libre (e.g. « Saint julien du Sault »).
+
+ garn = parent.find(id = f'{rp}lbNomGare')
+ place.closest_train_station = garn.text
+
+ # Accès: distance de l'arrêt de bus le plus proche.
+ # Au format libre.
+
+ busd = parent.find(id = f'{rp}lbDistanceArretBus')
+ place.closest_bus_stop_distance = busd.text
+
+ # Accès: nom de l'arrêt de bus le plus proche.
+ # Au format libre.
+
+ busn = parent.find(id = f'{rp}lbNomArretBus')
+ place.closest_bus_stop = busn.text
+
+ return place
+
+ def _decode_html_intranet_structure_summary(self, tree):
+ """ Decode the HTML summary page for a structure from a
+ BeautifulSoup decoded content. """
+
+ parent = tree.find(id = 'ctl00_ctl00_MainContent_TabsContent_' \
+ 'TabContainerResumeStructure__tabResume')
+ rp = 'ctl00_ctl00_MainContent_TabsContent_' \
+ 'TabContainerResumeStructure__tabResume__resume__'
+
+ st = _Structure()
+
+ # Type de structure, parmi :
+ # - « Unité 8-11 ans » : unité louveteaux-jeanettes.
+
+ typ = parent.find(id = f"{rp}lblType")
+ st.type = typ.text
+
+ # Code de la structure.
+
+ code = parent.find(id = f"{rp}lblCodeStructure")
+ st.code = code.text
+
+ # Nom de la structure.
+
+ nom = parent.find(id = f"{rp}lblNom")
+ st.name = nom.text
+
+ # Description de la structure.
+
+ desc = parent.find(id = f"{rp}lblDescription")
+ st.description = desc.text
+
+ # Statut de la structure. TODO
+ # Par exemple, « Ouverte ».
+
+ status = parent.find(id = f"{rp}lblStatut")
+ st.status = status.text
+
+ # Hémisphère de la structure. TODO
+
+ hemis = parent.find(id = f"{rp}lblHemisphere")
+ st.hemisphere = hemis.text
+
+ # Adresse de la structure. TODO
+
+ def lines():
+ for i in range(1, 4):
+ try:
+ aid = f'{rp}resumeAdresse__lbLigne{i}'
+ al = parent.find(id = aid)
+ assert al != None
+ except:
+ continue
+ yield al.text.strip()
+
+ st.address = '\n'.join(lines())
+
+ # Code postal.
+
+ cp = parent.find(id = f"{rp}resumeAdresse__lbCodePostal")
+ st.postal_code = cp.text.strip()
+
+ # Nom de la commune.
+
+ vil = parent.find(id = f"{rp}resumeAdresse__lbVille")
+ st.town = vil.text.strip()
+
+ # Pays de la commune.
+
+ pays = parent.find(id = f"{rp}resumeAdresse__lbPays")
+ st.country = pays.text.strip()
+
+ # Département administratif.
+
+ dept = parent.find(id = f"{rp}lblDepartement")
+ st.department = dept.text.strip()
+
+ # Numéro de téléphone.
+
+ phone = parent.find(id = f"{rp}lblTelephone")
+ st.phone = phone.text.strip()
+
+ # Numéro de fax.
+
+ fax = parent.find(id = f"{rp}lblFax")
+ st.fax = fax.text.strip()
+
+ # Adresse de courriel.
+
+ email = parent.find(id = f"{rp}lblCourrier")
+ st.email = email.text.strip()
+
+ # Site web.
+
+ website = parent.find(id = f"{rp}hlSiteWeb")
+ try:
+ st.website = website['href']
+ except KeyError:
+ pass
+
+ # Date de début d'activité pour la saison courante.
+
+ deb = parent.find(id = f"{rp}lblDateDebutActivite")
+ st.start = deb.text
+
+ # Informations de localisation.
+
+ loc = parent.find(id = f"{rp}lblInfosLocalisation")
+ st.location_info = loc.text
+
+ # Coordonnées GPS. TODO
+
+ lat = parent.find(id = f"{rp}lblCoordonneesGPSLatitude")
+ st.gps_latitude = lat.text
+ lon = parent.find(id = f"{rp}lblCoordonneesGPSLongitude")
+ st.gps_longitude = lon.text
+
+ return st
+
+ def _decode_html_intranet_structure_hierarchy(self, tree):
+ """ Decode the HTML hierarchy page for a structure from a
+ BeautifulSoup decoded content. """
+
+ parent = tree.find(id = 'ctl00_ctl00_MainContent_TabsContent_' \
+ 'TabContainerResumeStructure__tabHierarche')
+
+ # Récupération de la structure parente. TODO
+ # `pid`: identifiant de la structure parente.
+ # `pcode`: code de la structure parente.
+ # `pname`: nom de la structure parente.
+
+ code = parent.find(id = 'ctl00_ctl00_MainContent_TabsContent_' \
+ 'TabContainerResumeStructure__tabHierarche__gvParents_ctl02_' \
+ '_hlStructure')
+ pid = _parse_qs(_urlparse(code['href']).query)['id'][0]
+ pid = _b64decode(pid)
+ pcode = code.text
+
+ p = parent.find(id = 'ctl00_ctl00_MainContent_TabsContent_' \
+ 'TabContainerResumeStructure__tabHierarche__gvParents')
+ pname = p.find_all('td')[1].text
+
+ #self.structures.add(st)
+ return None
+
+ # ---
+ # HTML (Ajax) fragments decoding.
+ # ---
+
+ _decode_ajax_intranet_personlist_codes = (9192, 9431, 9449, 15819, 17464,
+ 17721, 20723, 22607, 22864)
+ _decode_ajax_intranet_person_summary_codes = (21686,)
+ _decode_ajax_intranet_person_family_codes = (7190,)
+ _decode_ajax_intranet_calendar_month_codes = (23896, 24180, 24954, 26883,
+ 26931, 27407, 27933, 28026, 29232, 31576, 33283, 33288, 33438, 38663,
+ 39664, 40051, 47167)
+
+ def _decode_html_intranet_personlist_fragment(self, tree):
+ """ Decode the HTML operations from a BeautifulSoup decoded
+ content. """
+
+ people = []
+
+ parent = tree.find(id = 'ctl00_MainContent__recherche__gvResultats')
+ for line in parent.children:
+ try:
+ _, name, adh, fct, st, finfct, cp, vil, \
+ finadh, *_ = line.children
+ except:
+ continue
+
+ p = Person()
+
+ # Nom et ID interne.
+
+ try:
+ url = next(name.children)['href']
+ q = _parse_qs(_urlparse(url).query)
+ iid = q['id'][0]
+ except:
+ continue
+
+ nom = next(name.children).text
+ props = nom.split()
+
+ p.iid = iid
+ p.common_name = props[0]
+ p.first_name = props[-1]
+
+ # Code d'adhérent, fonction, structure associée,
+ # période d'adhésion. TODO
+ #
+ # La date de fin de fonction est au format JJ/MM/AAAA.
+ # La date de fin d'adhésion est au format JJ/MM/AAAA.
+ # Le code adhérent est un nombre uniquement, e.g. « 110 ».
+ #
+ # Si la personne est une invitée, les champs de fin d'adhésion
+ # ainsi que le code adhérent sont vides (mais pas le champ de
+ # fin de fonction, qui fait référence à la date de fin du
+ # statut d'invité).
+ #
+ # Il peut y avoir un code adhérent mais pas de date de fin
+ # d'adhésion + fonction, ou simplement pas de date de fin de
+ # fonction.
+
+ try:
+ code = next(adh.children).text
+ fin_adhesion = next(finadh.children).text
+ except:
+ code = None
+
+ fonction = fct.text
+ try:
+ finfonction = next(finfct.children).text
+ except:
+ finfonction = None
+ structure = next(st.children).text
+
+ # Commune de résidence.
+
+ code_postal = next(cp.children).text
+ ville = next(vil.children).text
+
+ p.address.municipality.name = ville
+ p.address.municipality.postal_code = code_postal
+
+ # Ajout au tableau.
+
+ people.append(p)
+
+ return people
+
+ def _decode_html_intranet_calendar_month_fragment(self, tree):
+ """ Decode the HTML page for a calendar month from a
+ BeautifulSoup decoded content. """
+
+ parent = tree.find(id = 'ctl00_MainContent__calendrier')
+ events = []
+
+ for jour in parent.find_all(attrs = {'class': ['jourDuMois']}):
+ for div in jour.find_all('div'):
+ try:
+ e_id = div.find_all('a', recursive = False)[1]
+ except:
+ continue
+ e_id = _parse_qs(_urlparse(e_id['href']).query)['id'][0]
+
+ e = Event()
+
+ span = div.find('span')['onmouseover']
+ span = span[span.find("'") + 1:]
+ spanidx = next(_rfinditer(r"($|[^\\])(\\{2})*\'", span)).end()
+ span = span[:spanidx - 1]
+ span = _rreplace(r"($|[^\\])\\*'", r"\1'", span)
+ span = _htmlunescape(span)
+
+ span = span.replace('<br />', '\n')
+ span = span.replace('<hr />', '')
+ span = span.replace('<u>', '')
+ span = span.replace('</u>', '')
+ span = _rsplit(r'</?b>', span)
+
+ # Identifiant de l'intranet.
+
+ e.iid = e_id
+
+ # Libellé de l'évènement.
+
+ e.name = span[1].strip()
+
+ # Description de l'évènement.
+ # Au format multi-lignes.
+
+ desc = span[14].strip()
+ if desc != "Indéterminée":
+ e.desc = desc
+
+ # Type de l'évènement. TODO
+ # Parmi :
+ # - "Sortie Journée"
+ # - "Réunion Demi-Journée"
+ # - "Week-End"
+ # - "Réunion de travail"
+ # - "Camp Année"
+ # - "Camp Eté"
+ # - "Extrajob"
+
+ typ = span[4].strip()
+
+ # Date de début de l'évènement.
+ # Sous le format "JJ/MM/AAAA HH:MM".
+
+ def dateheure(x):
+ jma, hm = x.split()
+ j, M, a = map(int, jma.split('/'))
+ h, m = map(int, hm.split(':'))
+
+ return DateHeure(a, M, j, h, m)
+
+ du = span[6].strip()
+ e.start = dateheure(du)
+
+ # Date de fin de l'évènement.
+ # Sous le format "JJ/MM/AAAA HH:MM".
+
+ au = span[8].strip()
+ e.end = dateheure(au)
+
+ # Lieu de l'évènement.
+ # En une ligne. "Indéterminé" s'il n'y en a pas.
+
+ lieu = span[10].strip()
+ if lieu != "Indéterminé":
+ e.place.name = lieu
+
+ # Structure principale ayant proposé l'évènement. TODO
+ # Au format "CODE - NOM".
+
+ prop = span[12].strip()
+
+ # Dernier utilisateur ayant mis à jour l'évènement. TODO
+ # Nom seulement.
+
+ maj = span[16].strip()
+
+ # Fonctions concernées. TODO
+ # Valeurs possibles :
+ # - "Toutes".
+
+ fct = span[18].strip()
+
+ # Date de rappel de l'évènement. TODO
+ # Valeurs possibles :
+ # - "Aucune"
+ # - ?
+
+ dr = span[20].strip()
+
+ # Visibilité de l'évènement. TODO
+ # Valeurs possibles :
+ # - "Structure seule"
+ # - "Structures dépendantes"
+
+ vis = span[22].strip()
+
+ # Date de dernier envoi des invitations. TODO
+ # Valeurs possibles :
+ # - "Aucun"
+ # - ?
+
+ inv = span[24].strip()
+
+ # Ajout de l'évènement à la liste.
+
+ events.append(e)
+
+ return events
+
+ # ---
+ # XLS documents decoding.
+ # ---
+
+ def _decode_xls_intranet_operations(self, data):
+ """ Decode the XLS documents for operations ("Journal_activites.xls")
+ from a pandas decoded content. """
+
+ print(content)
+ raise NotImplementedError
+
+ # ---
+ # JSON documents decoding.
+ # ---
+
+ def _decode_json_intranet_functions(self, data):
+ """ Decode the JSON functions autocompletion data. """
+
+ def _sfix(code):
+ if code[-1:] == "L" or code[:1] == "3":
+ return "_L"
+ if code[-1:] == "N" or code[:1] == "6":
+ return "_N"
+ if code[-1:] == "T" or code[:1] == "9":
+ return "_T"
+ return ""
+
+ def _cname(cname):
+ """ Make a canonical name out of a name. """
+
+ cfilt = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_'
+
+ cname = cname.casefold().upper()
+ cname = cname.replace(' - ', ' ')
+ cname = cname.replace(' !', '')
+ cname = cname.replace(' ', ' ')
+ cname = cname.replace("D'", '')
+ cname = cname.replace(' ', '_')
+ cname = cname.replace('É', 'E')
+ cname = cname.replace('À', 'A')
+ cname = ''.join(filter(lambda c: c in cfilt, cname))
+
+ return cname
+
+ # First phase: load from the JSON and store as an array under the
+ # constant name.
+
+ funcs = {}
+
+ for idx, value in enumerate(content):
+ iid = int(value['id'])
+ name = value['name']
+
+ code, rest = name.split('(')
+ code = code.strip()
+ rest = rest.split(')')[0]
+
+ rest = rest.split('/')
+ nm = rest[0].strip()
+ if len(rest) == 1:
+ nf = None
+ else:
+ nf = '/'.join(rest[1:])
+ nf = nf.strip()
+ if nf == nm:
+ nf = None
+
+ cnames = [_cname(nm)]
+ if nf is not None and _cname(nf) != _cname(nm):
+ cnames.append(_cname(nf))
+
+ func = _FunctionRawData(iid, code, nm, nf, cnames, idx + 1)
+
+ for cname in cnames:
+ if not cname in funcs:
+ funcs[cname] = []
+ funcs[cname].append(func)
+
+ # Second phase: disambiguation.
+
+ while True:
+ try:
+ cname = next(cn for cn in funcs.keys() if len(funcs[cn]) > 1)
+ except StopIteration:
+ break
+
+ disf = funcs[cname]
+ del funcs[cname]
+
+ # We ought to find a solution to make other canonical names.
+
+ codes = [func.code for func in disf]
+ sfix = [_sfix(code) for code in codes]
+ if len(sfix) > len(set(sfix)):
+ raise ValueError(f"Could not deduplicate using scale: {codes}")
+
+ for func, suffix in zip(disf, sfix):
+ # Delete all the current function references.
+
+ for cname in func.cnames:
+ try:
+ f = funcs[cname]
+ except KeyError:
+ continue
+
+ f.remove(func)
+ if not f:
+ del funcs[cname]
+
+ # Replace all of the cnames.
+
+ func = func.add_cname_suffix(suffix)
+
+ # Add all of the references again.
+
+ for cname in func.cnames:
+ if not cname in funcs:
+ funcs[cname] = []
+ funcs[cname].append(func)
+
+ # Third phase: flatten the functions and we're done!
+
+ funcs = list(set(func[0] for func in funcs.values()))
+ funcs.sort(key = lambda x: x.srcorder)
+
+ return funcs
+
+# End of file.
diff --git a/sgdfi/_session.py b/sgdfi/_intranet.py
index 5a53783..e349bd3 100755
--- a/sgdfi/_session.py
+++ b/sgdfi/_intranet.py
@@ -16,9 +16,10 @@ from json import dumps as _jsondumps
from requests import Session as _Session
from bs4 import BeautifulSoup as _BeautifulSoup
-from ._manager import Manager as _Manager
from ._repr import IID as _IID
+__all__ = ["IntranetSession", "IntranetDecoder"]
+
_monotime = lambda: _getclocktime(_MONOCLOCK)
class RedirectError(Exception):
@@ -38,13 +39,12 @@ class RedirectError(Exception):
# Définition de l'objet principal.
# ---
-class Session(_Manager):
+class IntranetSession:
""" Class for interacting with the intranet. """
- def __init__(self, base = 'https://intranet.sgdf.fr',
- user = None, pw = None, *args, **kwargs):
- super().__init__(*args, **kwargs)
-
+ def __init__(self, manager, base = 'https://intranet.sgdf.fr',
+ user = None, pw = None):
+ self.__mgr = manager
self.__base = base
self.__user = user
self.__pw = pw
@@ -426,5 +426,4 @@ class Session(_Manager):
method = self.METHOD_AJAX,
hint = 'calendar_month')
-
# End of file.
diff --git a/sgdfi/_manager.py b/sgdfi/_manager.py
index 1d23c5c..67efe05 100755
--- a/sgdfi/_manager.py
+++ b/sgdfi/_manager.py
@@ -3,168 +3,185 @@
# Copyright (C) 2018 Thomas "Cakeisalie5" Touhey <thomas@touhey.fr>
# This file is part of the sgdfi project, which is MIT-licensed.
#******************************************************************************
-""" Definition of the main object to decode and manage objects of the
- SGDF's intranet, without the network part which is managed by the
- `Session` object, which inherits from this class to manage its
- received data. """
+""" Definition of the main object to decode and manage representations, and
+ manage sessions. """
import os.path as _path
from os import makedirs as _makedirs, open as _open, fdopen as _fdopen, \
O_WRONLY as _O_WRONLY, O_CREAT as _O_CREAT, O_EXCL as _O_EXCL
from sys import stdout as _stdout
-from itertools import count as _count
-from datetime import datetime as _datetime
-from re import finditer as _rfindter, sub as _rreplace, split as _rsplit
-from io import IOBase as _IOBase
-from base64 import b64decode as _b64decode
-from urllib.parse import urlparse as _urlparse, parse_qs as _parse_qs, \
- unquote as _unquote
-from html import unescape as _htmlunescape
-from xml.etree.ElementTree import ElementTree as _XMLTree, \
- fromstring as _XMLFromString
-from json import loads as _jsonloads
-from csv import reader as _csvreader
-
-from bs4 import BeautifulSoup as _BeautifulSoup
-from pandas import read_excel as _read_excel
+from shutil import copyfile as _copyfile
+from datetime import datetime as _datetime, date as _date
+
from appdirs import user_cache_dir as _user_cache_dir
from ._repr import Structure as _Structure, Adherent as _Adherent, \
Place as _Place, RallyRegistration as _RallyRegistration, Camp as _Camp, \
Operation as _Operation, FunctionRawData as _FunctionRawData
from ._db import Database as _Database
+from ._intranet import IntranetSession as _Session
+from ._decode import Decoder as _Decoder
-__all__ = ["Manager"]
+__all__ = ["Manager", "ALL"]
-# Internal classes for the `_Manager` class.
+class AllType:
+ pass
-class _Pagination:
- def __init__(self):
- self.current = 0
- self.number = 0
- self.more = False
+ALL = AllType()
-class _AjaxField:
- """ Microsoft Ajax response part. """
+# ---
+# Utilities.
+# ---
- def __init__(self, code, name, value, text):
- self.__code = str(code)
- self.__name = str(name)
- self.__value = str(value)
- self.__text = str(text)
+# As `type` is a local variable in some functions, we define an alias to
+# the real function named `_type`.
- def __repr__(self):
- return f'Field(id = {repr(self.code)}, name = {repr(self.name)}, ' \
- f'attrib = {repr(self.value)})'
+_type = type
- @property
- def code(self):
- """ The code (first field). """
+# Get the date for the public interface.
- return self.__code
+def _dt(value, fromspan = False):
+ """ Get a datetime or datetime span from anything (and validate). """
- @property
- def name(self):
- """ The name (second field). """
+ if isinstance(value, _datetime):
+ return value
+ elif isinstance(value, _date):
+ return _datetime.fromordinal(value.toordinal())
+ else:
+ return _datetime.fromtimestamp(value)
- return self.__name
+# Check if something is a valid symbol, for making valid Python keywords.
- @property
- def value(self):
- """ The value (third field). """
+def _validsym(s):
+ """ Check if is a valid symbol part. """
- return self.__value
+ allowed_fst = 'abcdefghijklmnopqrstuvwxyz'
+ allowed_end = allowed_fst + '0123456789'
+ allowed_all = allowed_end + '_'
- @property
- def text(self):
- """ The text (fourth field). """
+ if s is None:
+ return True
- return self.__text
+ if s != s.casefold():
+ return False
+ if not len(s) or len(s) > 30:
+ return False
+ if any(x not in allowed_all for x in s):
+ return False
+ if s[0] not in allowed_fst or s[-1] not in allowed_end:
+ return False
+ return True
-class _StructureManager:
- """ The structure manager. """
+# ---
+# Management classes.
+# ---
- def __init__(self, main):
- self.__main = main
+class _StructureManager:
+ """ Structure access class. """
- def add(self, st):
- if not isinstance(st, _Structure):
- raise ValueError("Expected a 'Structure'.")
+ def __init__(self, mgr):
+ self.__mgr = mgr
- self.__main.add_structure(st)
+ def __getitem__(self, key):
+ raise NotImplementedError
class _AdherentManager:
- """ The adherent manager. """
+ """ Adherent access class. """
- def __init__(self, main):
- self.__main = main
+ def __init__(self, mgr):
+ self.__mgr = mgr
- def add(self, ad):
- if not isinstance(ad, _Adherent):
- raise ValueError("Expected an 'Adherent'.")
-
- self.__main.add_adherent(ad)
+ def __getitem__(self, key):
+ raise NotImplementedError
class _RallyRegistrationManager:
- """ The rally registration manager. """
-
- def __init__(self, main):
- self.__main = main
+ """ Rally registration access class. """
- def add(self, rreg):
- if not isinstance(rreg, _RallyRegistration):
- raise ValueError("Expected a 'RellyRegistration'.")
+ def __init__(self, mgr):
+ self.__mgr = mgr
- self.__main.add_rally_registration(rreg)
+ def __getitem__(self, key):
+ raise NotImplementedError
class _CampManager:
- """ The camp manager. """
+ """ Camp access class. """
- def __init__(self, main):
- self.__main = main
+ def __init__(self, mgr):
+ self.__mgr = mgr
- def add(self, cp):
- if not isinstance(cp, _Camp):
- raise ValueError("Expected a 'Camp'.")
-
- self.__main.add_camp(cp)
+ def __getitem__(self, key):
+ raise NotImplementedError
class _PlaceManager:
- """ The place manager. """
+ """ Place access class. """
- def __init__(self, main):
- self.__main = main
+ def __init__(self, mgr):
+ self.__mgr = mgr
- def add(self, pl):
- if not isinstance(pl, _Place):
- raise ValueError("Expected a 'Place'.")
+ def __getitem__(self, key):
+ raise NotImplementedError
- self.__main.add_place(pl)
+class _EventStructureManager:
+ """ Structure event access class. """
-class _OperationManager:
- """ The operations manager. """
+ def __init__(self, mgr, st):
+ self.__mgr = mgr
+ self.__st = st
- def __init__(self, main):
- self.__main = main
+ def __getitem__(self, key):
+ if isinstance(key, slice):
+ # The mgr.events[<iid>][datetime(y1, m1, d1):datetime(y2, m2, d2)]
+ # notations was used. The step is not read.
- def add(self, op):
- if not isinstance(op, _Operation):
- raise ValueError("Expected an 'Operation'.")
+ start = _dt(key.start) if key.start is not None else None
+ stop = _dt(key.stop) if key.stop is not None else None
- self.__main.add_op(op)
+ if start > stop:
+ return []
-# The main class.
+ raise NotImplementedError
+ elif isinstance(key, _date):
+ # We ought to get the events corresponding to a date.
-_type = type
+ raise NotImplementedError
+
+ raise KeyError("Should only use slice or date key.")
+
+class _EventManager:
+ """ Event access class. """
-class Manager:
+ def __init__(self, mgr):
+ self.__mgr = mgr
+
+ def __getitem__(self, key):
+ if key is ALL:
+ return _EventStructureManager(self.__mgr, ALL)
+
+ raise NotImplementedError
+
+class _OperationManager:
+ """ Operation access class. """
+
+ def __init__(self, mgr):
+ self.__mgr = mgr
+
+ def __getitem__(self, key):
+ raise NotImplementedError
+
+# ---
+# The main class.
+# ---
+
+class Manager(_Decoder):
""" Manage objects from SGDF's intranet. """
def __init__(self, save = False, folder = None):
- self._pgn = _Pagination()
- self.__save = save
- self.__folder = folder
+ super().__init__()
+
+ self.__save = save
+ self.__folder = folder
+ self.__sessions = []
# Work out the folder, and make sure it exists.
@@ -180,12 +197,13 @@ class Manager:
self.__db = _Database(cache_dir = cache_dir)
- self.__sts = _StructureManager(self.__db)
- self.__ads = _AdherentManager(self.__db)
- self.__rrs = _RallyRegistrationManager(self.__db)
- self.__cps = _CampManager(self.__db)
- self.__pls = _PlaceManager(self.__db)
- self.__ops = _OperationManager(self.__db)
+ self.__sts = _StructureManager(self)
+ self.__ads = _AdherentManager(self)
+ self.__rrs = _RallyRegistrationManager(self)
+ self.__cps = _CampManager(self)
+ self.__pls = _PlaceManager(self)
+ self.__evs = _EventManager(self)
+ self.__ops = _OperationManager(self)
@property
def structures(self):
@@ -218,1005 +236,130 @@ class Manager:
return self.__pls
@property
+ def events(self):
+ """ The events manager. """
+
+ return self.__evs
+
+ @property
def operations(self):
""" The operations manager. """
return self.__ops
# ---
- # Save file management.
+ # Dump management.
# ---
- def load_dump(self, name):
- """ Read from a saved dump. """
-
- path = _path.join(self.__folder, name)
-
- name, *ext = name.split('.')
- ext = '.'.join(ext)
- name, *hint = name.split('-')
- hint = '-'.join(hint)
- if not hint:
- hint = None
-
- if ext == 'html':
- type = 'html'
- elif ext == 'txt':
- type = 'ajax'
- elif ext == 'csv':
- type = 'csv'
- elif ext == 'xls':
- type = 'xls'
- elif ext == 'xml':
- type = 'xml'
- elif ext == 'json':
- type = 'json'
- else:
- type = None
-
- return self.read(path, type, hint)
-
- def save_dump(self, content, hint = None, type = 'html'):
- """ Save a file. """
-
- exts = {'html': 'html', 'csv': 'csv', 'xls': 'xls', 'xml': 'xml',
- 'json': 'json'}
- ext = exts.get(type, 'txt')
-
- fdmode = 'w'
- if _type(hint) == str:
- hint = f"-{hint}"
- else:
- hint = ""
-
- srt = f"{_datetime.now().strftime('%Y%m%d%H%M%S')}"
- end = f"{hint}.{ext}"
-
- for idx in range(100):
- try:
- fd = _open(_path.join(self.__folder, f"{srt}{idx:02d}{end}"),
- _O_WRONLY | _O_CREAT | _O_EXCL)
- except FileExistsError:
- continue
-
- f = _fdopen(fd, fdmode)
- break
- else:
- raise ValueError("Could not find a suitable index…")
+ _guessfile = {
+ 'html': 'text/html',
+ 'csv': 'text/csv',
+ 'xml': 'text/xml',
+ 'json': 'application/json',
+ 'xls': 'application/vnd.ms-excel'}
- f.write(content)
+ def load_file(self, path, type = None, **kwargs):
+ """ Read from a file without headers. """
- # ---
- # Decoding/feeding part.
- # ---
-
- def read(self, path, type, hint = None):
- """ Read from any file. """
-
- return self.feed(open(path).read(), type, hint)
-
- _AjaxFieldHintCodes = {
- 'personlist': (9192, 9431, 9449, 15819, 17464, 17721, 20723,
- 22607, 22864),
- 'person_summary': (21686,),
- 'person_family': (7190,)
- }
- _AjaxFieldCodeHints = {code: hint \
- for hint, codes in _AjaxFieldHintCodes.items() for code in codes}
-
- def feed(self, content, type, hint = None):
- """ Feed the manager with a document.
-
- `content`: the raw content as UTF-8 encoded text.
- `type`: the type amongst "html", "ajax", "csv", "xls".
- `hint`: the hint. """
-
- if hint == 'ignore':
- return None
-
- if self.__save:
- self.save_dump(content, hint, type)
- if hint == 'raw':
- return content
-
- # FIXME: vérifier s'il y a eu une erreur.
-
- if type == 'html':
- if hint == 'operations':
- func = self.__feed_html_operations
- elif hint == 'place':
- func = self.__feed_html_place
- elif hint == 'personlist_fragment':
- func = self.__feed_html_personlist_fragment
- elif hint == 'person_summary_fragment':
- func = self.__feed_html_person_summary_fragment
- elif hint == 'person_family_fragment':
- func = self.__feed_html_person_family_fragment
- elif hint == 'structure_summary':
- func = self.__feed_html_structure_summary
- elif hint == 'structure_hierarchy':
- func = self.__feed_html_structure_hierarchy
- elif hint == 'calendar_month_fragment':
- func = self.__feed_html_calendar_month_fragment
- else:
- raise ValueError(f"unknown html hint: {repr(hint)}")
-
- content = _BeautifulSoup(content, 'lxml')
- return func(content)
- elif type == 'ajax':
- # Les réponses aux appels AJAX sont un ensemble d'éléments séparés
- # par des pipes ('|'), à prendre par groupe de quatre tels que
- # décrits par le document `intranet.rst`.
-
- resp = []
- raw = iter(content.split('|')[:-1])
- for code in raw:
- code = int(code)
- name = next(raw)
- attrib = next(raw)
- text = next(raw)
-
- resp.append(_AjaxField(code, name, attrib, text))
-
- # La charge utile de la réponse est dans la seconde entrée
- # généralement. Le code permet de déterminer de quoi il en
- # retourne, mais beaucoup de codes peuvent être émis selon les
- # actions précédentes et le contexte (e.g. état du formulaire
- # de recherche), donc on fait confiance au contexte tout en
- # proposant un contexte par défaut selon le code.
-
- field = resp[1]
-
- # On vérifie l'indice donné par l'utilisateur, s'il y en a un.
-
- if hint is None:
- try:
- hint = _AjaxFieldCodeHints[field.code]
- except KeyError:
- raise ValueError("ajax hint could not be determined")
-
- if hint == 'personlist':
- func = self.__feed_html_personlist_fragment
- elif hint == 'person_summary':
- func = self.__feed_html_person_summary_fragment
- elif hint == 'person_family':
- func = self.__feed_html_person_family_fragment
- elif hint == 'calendar_month':
- func = self.__feed_html_calendar_month_fragment
- else:
- raise ValueError(f"unknown ajax hint: {repr(hint)}")
-
- return func(field.text)
- elif type == 'csv':
- if hint == 'attend':
- func = self.__feed_csv_attend
- else:
- raise ValueError(f"unknown csv hint: {repr(hint)}")
-
- if not isinstance(content, _IOBase):
- stream = _StringIO(content)
- reader = _csvreader(content, delimiter = ';')
- resp = [row for row in reader]
-
- return func(resp)
- elif type == 'xls':
- if hint == 'people':
- func = self.__feed_xls_people
- elif hint == 'operations':
- func = self.__feed_xls_operations
+ if type is None:
+ for ext, mime in _guessfile.items():
+ if path.endswith(f".{ext}"):
+ type = mime
+ break
else:
- raise ValueError(f"unknown xls hint: {repr(hint)}")
+ raise ValueError("Could not guess the file type.")
- def entries(content):
- """ Récupération des entrées depuis une dataframe, et
- extraction en tant qu'itérateur. """
+ return self.feed(open(path, "rb"), type = type, **kwargs)
- df = _read_excel(stream)
+ def load_dump(self, time, id = 0):
+ """ Read from a saved dump (with headers). """
- for i in _it.count():
- try:
- yield {i.replace(".", ""): j \
- for i, j in dict(df.ix[i]).items()}
- except KeyError:
- break
-
- resp = [e for e in entries(content)]
- return func(resp)
- elif type == 'xml':
- tree = _XMLTree(_XMLFromString(content))
-
- if hint == 'functions':
- data = _jsonloads(tree.getroot().text)
- return self.__feed_json_functions(data)
- else:
- raise ValueError("unknown xml hint: {repr(hint)}")
- elif type == 'json':
- data = _jsonloads(content)
-
- if hint == 'functions':
- return self.__feed_json_functions(data)
- else:
- raise ValueError("unknown json hint: {repr(hint)}")
+ if isinstance(time, _datetime):
+ time = time.timestamp()
+ elif isinstance(time, _date):
+ time = _datetime.fromordinal(time.toordinal()).timestamp()
else:
- raise ValueError(f"unknown type: {repr(type)}")
-
- # ---
- # HTML pages and fragments decoding.
- # ---
-
- def __feed_html_operations(self, content):
- """ Decode the HTML operations from a BeautifulSoup decoded
- content and feed it into the manager's operations. """
-
- stprefix = '/Specialisation/Sgdf/structures/ResumeStructure.aspx'
- adprefix = '/Specialisation/Sgdf/adherents/ResumeAdherent.aspx'
- irprefix = '/Specialisation/Sgdf/Rassemblements/' \
- 'InscriptionRassemblementV2.aspx'
- cpprefix = '/Specialisation/Sgdf/camps/ConsulterModifierCamp.aspx'
- laprefix = '/Specialisation/Sgdf/Commun/ResumeLieuActivite.aspx'
-
- parent = content.find(id = 'ctl00_Popup__evenements__gvEvenements')
- if not parent:
- return [_Pagination(0, False, 1)]
-
- elts = []
-
- # Récupération de la pagination.
- # `numpages` : numéro maximal de page dans la pagination.
- # `more`: y a-t-il plus de pages (la dernière page est-elle
- # en « ... » ?).
- # `curpage` : page actuelle selon la pagination.
-
- p = parent.find('tr', {'class': ['pagination']})
- if p != None:
- p = p.find('tr')
- td = p.find_all('td')[-1]
- button = next(td.children)
-
- if button.name == 'span':
- num = button.text.strip()
- more = False
- else:
- num = button['href']
- num = num[num.find("'Page$") + 6:]
- num = num[:num.find("'")]
- more = button.text == '...'
-
- numpages = int(num)
-
- for td in p.find_all('td'):
- child = next(td.children)
- if child.name == 'span':
- curpage = int(child.text)
- break
- else:
- curpage = 1
- numpages = 1
- more = False
-
- self._pgn.current = curpage
- self._pgn.number = numpages
- self._pgn.more = more
-
- # Récupération de la liste d'évènements.
-
- if not parent.find('tr', attrs = {'class': ['vide']}):
- for elt in parent.find_all('tr', recursive = False):
- try:
- assert 'entete' in elt['class']
- continue
- except:
- pass
- try:
- assert 'pagination' in elt['class']
- continue
- except:
- pass
-
- td = elt.find_all('td')
- ch = iter(td)
-
- edate = next(ch).text.strip()
- ename = next(ch).text.strip()
- etype = next(ch).text.strip()
- eobjs = next(ch)
- edesc = next(ch).text.strip()
-
- # Time decoding.
-
- d, t = edate.split()
- day, mon, year = map(int, d.split('/'))
- hour, min, sec = map(int, t.split(':'))
- dt = _datetime(year, mon, day, hour, min, sec)
-
- # Operation creation.
-
- op = _Operation()
- op.author.name = ename
- if not op.author.name:
- # XXX: Bogus event of type `Individu /`… for now we're
- # ignoring it.
-
- continue
-
- op.time = dt
- op.type = etype
- op.fields = edesc
-
- # Objects decoding.
-
- for link in eobjs.find_all('a'):
- name = link.text
- url = _urlparse(link['href'])
- if url.path == stprefix:
- st = _Structure()
- st.iid = _parse_qs(url.query)['id'][0]
- st.name = name
- op.related.add(st)
- elif url.path == adprefix:
- ad = _Adherent()
- ad.iid = _parse_qs(url.query)['id'][0]
- ad.name = name
- op.related.add(ad)
- elif url.path == irprefix:
- ir = _RallyRegistration()
- ir.iid = _parse_qs(url.query)['id'][0]
- ir.name = name
- op.related.add(ir)
- elif url.path == cpprefix:
- cp = _Camp()
- cp.iid = _parse_qs(url.query)['IdCamp'][0]
- cp.name = name
- op.related.add(cp)
- elif url.path == laprefix:
- la = _Place()
- la.iid = _parse_qs(url.query)['id'][0]
- la.name = name
- op.related.add(la)
-
- self.operations.add(op)
- elts.append(op)
-
- return elts
-
- def __feed_html_place(self, content):
- """ Decode the HTML place from a BeautifulSoup decoded
- content and feed it into the manager's places. """
-
- parent = content.find(id = 'ctl00__upMainContent')
- rp = 'ctl00_MainContent__resume__'
-
- place = _Place()
-
- # Informations générales: Libellé.
-
- lib = parent.find(id = f'{rp}lbLibelle')
- place.name = lib.text
-
- # Informations générales: Description.
-
- desc = parent.find(id = f'{rp}lbDescription')
- place.description = desc.text
-
- # Informations générales: Fiche. TODO
- # XXX: absence de champ… faut voir ce que ça donne quand c'est rempli ?
-
- # Coordonnées: Adresse (lignes).
- # Avec plusieurs lignes du type `lbLigne1`, `lbLigne2`, `lbLigne3`.
- # Certaines lignes peuvent ne pas être présentes, e.g. `lbLigne2`
- # peut manquer.
-
- def lines():
- for i in range(1, 4):
- try:
- aid = f'{rp}resumeAdresse__lbLigne{i}'
- al = parent.find(id = aid)
- assert al != None
- except:
- continue
- yield al.text.strip()
-
- place.address = '\n'.join(lines())
-
- # Coordonnées: Adresse (code postal).
-
- cp = parent.find(id = f'{rp}resumeAdresse__lbCodePostal')
- place.postal_code = cp.text
-
- # Coordonnées: Adresse (nom de la commune).
-
- vil = parent.find(id = f'{rp}resumeAdresse__lbVille')
- place.town = vil.text
-
- # Coordonnées: Adresse (pays).
-
- pays = parent.find(id = f'{rp}resumeAdresse__lbPays')
- place.country = pays.text
-
- # Coordonnées: département administratif.
- # Au format `XX - Nom du département` (où XX représente le numéro).
-
- dept = parent.find(id = f'{rp}lbDepartementAdministratif')
- place.department = dept.text
-
- # Coordonnées: étranger.
- # "Oui" ou "Non" selon si le lieu se trouve en France ou non… ?
-
- etr = parent.find(id = f'{rp}lbEtranger')
- place.out_of_france = (True, False)[etr.text == 'Non']
-
- # Coordonnées: continent.
- # Nom du continent, e.g. « Europe ».
-
- cont = parent.find(id = f'{rp}lbContinent')
- place.continent = cont.text
-
- # Coordonnées: numéro de téléphone.
- # Numéro de téléphone fixe associé au lieu.
-
- phone = parent.find(id = f'{rp}lbTelephone')
- place.phone = phone.text
-
- # Coordonnées: numéro de fax.
- # Numéro de fax associé au lieu.
-
- fax = parent.find(id = f'{rp}lbFax')
- place.fax = fax.text
-
- # Coordonnées: adresse de courriel.
- # Adresse de courriel associée au lieu.
-
- email = parent.find(id = f'{rp}lbCourriel')
- place.email = email.text
-
- # Informations complémentaires: numéro J&S.
- # XXX: ?? (vide sur les lieux explorés).
-
- numjs = parent.find(id = f'{rp}lbNumeroJS')
- place.js = numjs.text
-
- # Informations complémentaires: hébergement "dur".
- # "Oui" s'il y a un hébergement en "dur" sur le lieu, "Non" sinon.
-
- hebd = parent.find(id = f'{rp}lbHebergementDur')
- place.hardwall = (True, False)[hebd.text == 'Non']
-
- # Informations complémentaires: numéro de local.
- # XXX: ?? (vide sur les lieux explorés).
-
- numloc = parent.find(id = f'{rp}lbNumeroLocal')
- place.localnumber = numloc.text
-
- # Informations complémentaires: propriétaire.
- # Nom du propriétaire, e.g. « Bertrand DUPONT » (format libre).
-
- prop = parent.find(id = f'{rp}lbProprietaire')
- place.owner = prop.text
-
- # Informations complémentaires: adresse du propriétaire.
- # XXX: ?? (vide sur les lieux explorés).
-
- addrp = parent.find(id = f'{rp}lbAdresseProprietaire')
- place.owner_address = addrp.text
-
- # Accès: numéro de carte IGN.
- # XXX: ?? (vide sur les lieux explorés).
-
- ign = parent.find(id = f'{rp}lbNumeroCarteIGN')
- place.ign_num = ign.text
-
- # Accès: accès voiture.
- # "Oui" si le lieu est accessible en voiture, "Non" sinon.
-
- voit = parent.find(id = f'{rp}lbAccesVoiture')
- place.car_access = (True, False)[voit.text == 'Non']
-
- # Accès: distance de la gare la plus proche.
- # Au format libre (e.g. « 5km »).
-
- gard = parent.find(id = f'{rp}lbDistanceGare')
- place.closest_train_station_distance = gard.text
-
- # Accès: nom de la gare la plus proche.
- # Au format libre (e.g. « Saint julien du Sault »).
-
- garn = parent.find(id = f'{rp}lbNomGare')
- place.closest_train_station = garn.text
-
- # Accès: distance de l'arrêt de bus le plus proche.
- # Au format libre.
-
- busd = parent.find(id = f'{rp}lbDistanceArretBus')
- place.closest_bus_stop_distance = busd.text
-
- # Accès: nom de l'arrêt de bus le plus proche.
- # Au format libre.
-
- busn = parent.find(id = f'{rp}lbNomArretBus')
- place.closest_bus_stop = busn.text
-
- self.places.add(place)
- return place
-
- def __feed_html_structure_summary(self, content):
- """ Decode the HTML summary page for a structure from a
- BeautifulSoup decoded content and feed it into the
- manager's structures. """
-
- parent = content.find(id = 'ctl00_ctl00_MainContent_TabsContent_' \
- 'TabContainerResumeStructure__tabResume')
- rp = 'ctl00_ctl00_MainContent_TabsContent_' \
- 'TabContainerResumeStructure__tabResume__resume__'
-
- st = _Structure()
+ time = int(time)
- # Type de structure, parmi :
- # - « Unité 8-11 ans » : unité louveteaux-jeanettes.
+ kwargs = {}
- typ = parent.find(id = f"{rp}lblType")
- st.type = typ.text
-
- # Code de la structure.
-
- code = parent.find(id = f"{rp}lblCodeStructure")
- st.code = code.text
-
- # Nom de la structure.
-
- nom = parent.find(id = f"{rp}lblNom")
- st.name = nom.text
-
- # Description de la structure.
-
- desc = parent.find(id = f"{rp}lblDescription")
- st.description = desc.text
-
- # Statut de la structure. TODO
- # Par exemple, « Ouverte ».
-
- status = parent.find(id = f"{rp}lblStatut")
- st.status = status.text
-
- # Hémisphère de la structure. TODO
-
- hemis = parent.find(id = f"{rp}lblHemisphere")
- st.hemisphere = hemis.text
-
- # Adresse de la structure. TODO
-
- def lines():
- for i in range(1, 4):
- try:
- aid = f'{rp}resumeAdresse__lbLigne{i}'
- al = parent.find(id = aid)
- assert al != None
- except:
- continue
- yield al.text.strip()
-
- st.address = '\n'.join(lines())
-
- # Code postal.
-
- cp = parent.find(id = f"{rp}resumeAdresse__lbCodePostal")
- st.postal_code = cp.text.strip()
-
- # Nom de la commune.
-
- vil = parent.find(id = f"{rp}resumeAdresse__lbVille")
- st.town = vil.text.strip()
-
- # Pays de la commune.
-
- pays = parent.find(id = f"{rp}resumeAdresse__lbPays")
- st.country = pays.text.strip()
-
- # Département administratif.
-
- dept = parent.find(id = f"{rp}lblDepartement")
- st.department = dept.text.strip()
-
- # Numéro de téléphone.
-
- phone = parent.find(id = f"{rp}lblTelephone")
- st.phone = phone.text.strip()
-
- # Numéro de fax.
+ fp = open(_path.join(self.__folder, f"{time}{id:02d}.dump"), 'rb')
+ while True:
+ line = fp.readline().decode('UTF-8').splitlines()[0]
+ if not line:
+ break
- fax = parent.find(id = f"{rp}lblFax")
- st.fax = fax.text.strip()
+ kw, *value = line.split(':')
+ if not _validsym(kw) or kw == 'inp' or not value:
+ continue
+ value = ':'.join(value)
- # Adresse de courriel.
+ kwargs[kw] = value
- email = parent.find(id = f"{rp}lblCourrier")
- st.email = email.text.strip()
+ return self.decode(fp, **kwargs)
- # Site web.
+ def save_dump(self, inp, time = _datetime.now(), **kwargs):
+ """ Save a file. """
- website = parent.find(id = f"{rp}hlSiteWeb")
- try:
- st.website = website['href']
- except KeyError:
+ if isinstance(time, _datetime):
+ time = time.timestamp()
+ elif isinstance(time, _date):
+ time = _datetime.fromordinal(time.toordinal()).timestamp()
+ else:
+ time = int(time)
+
+ # The save process will be slightly different if we have a bytes
+ # or a text content.
+
+ mode = 'w'
+ if isinstance(inp, _RawIOBase):
+ if _type(inp.read(0)) == bytes:
+ mode = 'wb'
+ elif _type(inp) == bytes:
+ mode = 'wb'
+ elif _type(inp) == str:
pass
+ else:
+ raise ValueError("Could not save this content")
- # Date de début d'activité pour la saison courante.
-
- deb = parent.find(id = f"{rp}lblDateDebutActivite")
- st.start = deb.text
-
- # Informations de localisation.
-
- loc = parent.find(id = f"{rp}lblInfosLocalisation")
- st.location_info = loc.text
-
- # Coordonnées GPS. TODO
-
- lat = parent.find(id = f"{rp}lblCoordonneesGPSLatitude")
- st.gps_latitude = lat.text
- lon = parent.find(id = f"{rp}lblCoordonneesGPSLongitude")
- st.gps_longitude = lon.text
-
- self.structures.add(st)
- return st
-
- def __feed_html_structure_hierarchy(self, content):
- """ Decode the HTML hierarchy page for a structure from a
- BeautifulSoup decoded content and feed it into the
- manager's structures. """
-
- parent = content.find(id = 'ctl00_ctl00_MainContent_TabsContent_' \
- 'TabContainerResumeStructure__tabHierarche')
-
- # Récupération de la structure parente. TODO
- # `pid`: identifiant de la structure parente.
- # `pcode`: code de la structure parente.
- # `pname`: nom de la structure parente.
-
- code = parent.find(id = 'ctl00_ctl00_MainContent_TabsContent_' \
- 'TabContainerResumeStructure__tabHierarche__gvParents_ctl02_' \
- '_hlStructure')
- pid = _parse_qs(_urlparse(code['href']).query)['id'][0]
- pid = _b64decode(pid)
- pcode = code.text
-
- p = parent.find(id = 'ctl00_ctl00_MainContent_TabsContent_' \
- 'TabContainerResumeStructure__tabHierarche__gvParents')
- pname = p.find_all('td')[1].text
-
- #self.structures.add(st)
- return None
-
- def __feed_html_personlist_fragment(self, content):
- """ Decode the HTML operations from a BeautifulSoup decoded
- content and feed it into the manager's operations. """
-
- people = []
-
- parent = content.find(id = 'ctl00_MainContent__recherche__gvResultats')
- for line in parent.children:
- try:
- _, name, adh, fct, st, finfct, cp, vil, \
- finadh, *_ = line.children
- except:
- continue
-
- p = Person()
-
- # Nom et ID interne.
+ # Open the file.
+ for idx in range(100):
try:
- url = next(name.children)['href']
- q = _parse_qs(_urlparse(url).query)
- iid = q['id'][0]
- except:
+ filename = f"{time}{idx:02d}.dump"
+ fd = _open(_path.join(self.__folder, filename),
+ _O_WRONLY | _O_CREAT | _O_EXCL)
+ except FileExistsError:
continue
- nom = next(name.children).text
- props = nom.split()
-
- p.iid = iid
- p.common_name = props[0]
- p.first_name = props[-1]
-
- # Code d'adhérent, fonction, structure associée,
- # période d'adhésion. TODO
- #
- # La date de fin de fonction est au format JJ/MM/AAAA.
- # La date de fin d'adhésion est au format JJ/MM/AAAA.
- # Le code adhérent est un nombre uniquement, e.g. « 110 ».
- #
- # Si la personne est une invitée, les champs de fin d'adhésion
- # ainsi que le code adhérent sont vides (mais pas le champ de
- # fin de fonction, qui fait référence à la date de fin du
- # statut d'invité).
- #
- # Il peut y avoir un code adhérent mais pas de date de fin
- # d'adhésion + fonction, ou simplement pas de date de fin de
- # fonction.
-
- try:
- code = next(adh.children).text
- fin_adhesion = next(finadh.children).text
- except:
- code = None
-
- fonction = fct.text
- try:
- finfonction = next(finfct.children).text
- except:
- finfonction = None
- structure = next(st.children).text
-
- # Commune de résidence.
-
- code_postal = next(cp.children).text
- ville = next(vil.children).text
-
- p.address.municipality.name = ville
- p.address.municipality.postal_code = code_postal
-
- # Ajout au tableau.
-
- people.append(p)
-
- # TODO: feed.
- return people
-
- def __feed_html_calendar_month_fragment(self, content):
- """ Decode the HTML page for a calendar month from a
- BeautifulSoup decoded content and feed it into the
- manager's structures. """
-
- parent = content.find(id = 'ctl00_MainContent__calendrier')
- events = []
-
- for jour in parent.find_all(attrs = {'class': ['jourDuMois']}):
- for div in jour.find_all('div'):
- try:
- e_id = div.find_all('a', recursive = False)[1]
- except:
- continue
- e_id = _parse_qs(_urlparse(e_id['href']).query)['id'][0]
-
- e = Event()
-
- span = div.find('span')['onmouseover']
- span = span[span.find("'") + 1:]
- spanidx = next(_rfinditer(r"($|[^\\])(\\{2})*\'", span)).end()
- span = span[:spanidx - 1]
- span = _rreplace(r"($|[^\\])\\*'", r"\1'", span)
- span = _htmlunescape(span)
-
- span = span.replace('<br />', '\n')
- span = span.replace('<hr />', '')
- span = span.replace('<u>', '')
- span = span.replace('</u>', '')
- span = _rsplit(r'</?b>', span)
-
- # Identifiant de l'intranet.
-
- e.iid = e_id
-
- # Libellé de l'évènement.
-
- e.name = span[1].strip()
-
- # Description de l'évènement.
- # Au format multi-lignes.
-
- desc = span[14].strip()
- if desc != "Indéterminée":
- e.desc = desc
-
- # Type de l'évènement. TODO
- # Parmi :
- # - "Sortie Journée"
- # - "Réunion Demi-Journée"
- # - "Week-End"
- # - "Réunion de travail"
- # - "Camp Année"
- # - "Camp Eté"
- # - "Extrajob"
-
- typ = span[4].strip()
-
- # Date de début de l'évènement.
- # Sous le format "JJ/MM/AAAA HH:MM".
-
- def dateheure(x):
- jma, hm = x.split()
- j, M, a = map(int, jma.split('/'))
- h, m = map(int, hm.split(':'))
-
- return DateHeure(a, M, j, h, m)
-
- du = span[6].strip()
- e.start = dateheure(du)
-
- # Date de fin de l'évènement.
- # Sous le format "JJ/MM/AAAA HH:MM".
-
- au = span[8].strip()
- e.end = dateheure(au)
-
- # Lieu de l'évènement.
- # En une ligne. "Indéterminé" s'il n'y en a pas.
-
- lieu = span[10].strip()
- if lieu != "Indéterminé":
- e.place.name = lieu
-
- # Structure principale ayant proposé l'évènement. TODO
- # Au format "CODE - NOM".
-
- prop = span[12].strip()
-
- # Dernier utilisateur ayant mis à jour l'évènement. TODO
- # Nom seulement.
-
- maj = span[16].strip()
-
- # Fonctions concernées. TODO
- # Valeurs possibles :
- # - "Toutes".
-
- fct = span[18].strip()
-
- # Date de rappel de l'évènement. TODO
- # Valeurs possibles :
- # - "Aucune"
- # - ?
-
- dr = span[20].strip()
-
- # Visibilité de l'évènement. TODO
- # Valeurs possibles :
- # - "Structure seule"
- # - "Structures dépendantes"
-
- vis = span[22].strip()
-
- # Date de dernier envoi des invitations. TODO
- # Valeurs possibles :
- # - "Aucun"
- # - ?
-
- inv = span[24].strip()
-
- # Ajout de l'évènement à la liste.
+ f = _fdopen(fd, fdmode)
+ break
+ else:
+ raise ValueError("Could not find a suitable index…")
- events.append(e)
+ # Write the headers.
- return events
+ for kw, value in kwargs.items():
+ print(f"{kw}: {value}", end = '\r\n', file = f)
+ print("", end = '\r\n', file = f)
- # ---
- # XLS documents decoding.
- # ---
+ # Then write the content.
- def __feed_xls_operations(self, content):
- """ Decode the XLS document for operations ("Journal_activites.xls")
- from a pandas decoded content and feed it into the
- manager's structures. """
+ if isinstance(inp, _RawIOBase):
+ _copyfile(inp, f)
+ else:
+ f.write(inp)
- print(content)
- raise Exception
+ return time, idx
# ---
- # JSON documents decoding.
+ # Session management.
# ---
- def __feed_json_functions(self, content):
- """ Decode the JSON functions autocompletion data and return
- them without feeding the database. """
-
- def _sfix(code):
- if code[-1:] == "L" or code[:1] == "3":
- return "_L"
- if code[-1:] == "N" or code[:1] == "6":
- return "_N"
- if code[-1:] == "T" or code[:1] == "9":
- return "_T"
- return ""
-
- def _cname(cname):
- """ Make a canonical name out of a name. """
-
- cfilt = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_'
-
- cname = cname.casefold().upper()
- cname = cname.replace(' - ', ' ')
- cname = cname.replace(' !', '')
- cname = cname.replace(' ', ' ')
- cname = cname.replace("D'", '')
- cname = cname.replace(' ', '_')
- cname = cname.replace('É', 'E')
- cname = cname.replace('À', 'A')
- cname = ''.join(filter(lambda c: c in cfilt, cname))
-
- return cname
-
- # First phase: load from the JSON and store as an array under the
- # constant name.
-
- funcs = {}
-
- for idx, value in enumerate(content):
- iid = int(value['id'])
- name = value['name']
-
- code, rest = name.split('(')
- code = code.strip()
- rest = rest.split(')')[0]
-
- rest = rest.split('/')
- nm = rest[0].strip()
- if len(rest) == 1:
- nf = None
- else:
- nf = '/'.join(rest[1:])
- nf = nf.strip()
- if nf == nm:
- nf = None
-
- cnames = [_cname(nm)]
- if nf is not None and _cname(nf) != _cname(nm):
- cnames.append(_cname(nf))
-
- func = _FunctionRawData(iid, code, nm, nf, cnames, idx + 1)
-
- for cname in cnames:
- if not cname in funcs:
- funcs[cname] = []
- funcs[cname].append(func)
-
- # Second phase: disambiguation.
-
- while True:
- try:
- cname = next(cn for cn in funcs.keys() if len(funcs[cn]) > 1)
- except StopIteration:
- break
-
- disf = funcs[cname]
- del funcs[cname]
-
- # We ought to find a solution to make other canonical names.
-
- codes = [func.code for func in disf]
- sfix = [_sfix(code) for code in codes]
- if len(sfix) > len(set(sfix)):
- raise ValueError(f"Could not deduplicate using scale: {codes}")
-
- for func, suffix in zip(disf, sfix):
- # Delete all the current function references.
-
- for cname in func.cnames:
- try:
- f = funcs[cname]
- except KeyError:
- continue
-
- f.remove(func)
- if not f:
- del funcs[cname]
-
- # Replace all of the cnames.
-
- func = func.add_cname_suffix(suffix)
-
- # Add all of the references again.
-
- for cname in func.cnames:
- if not cname in funcs:
- funcs[cname] = []
- funcs[cname].append(func)
-
- # Third phase: flatten the functions and we're done!
-
- funcs = list(set(func[0] for func in funcs.values()))
- funcs.sort(key = lambda x: x.srcorder)
+ def login(self, user, pw):
+ """ Add credentials to the mix. """
- return funcs
+ raise NotImplementedError
# ---
# Export dynamically gathered data such as functions as Python files