aboutsummaryrefslogtreecommitdiff
path: root/tools/Internals/copyright.py
blob: 232f86596926f02a84dc0c6293c357fab913fcde (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#!/usr/bin/env python3
""" Get the copyright rules from a root, and get the copyright rules on a file.
	Based on the `copyright.yml` files -- see `FORMAT.md` for its description.
"""

import os as _os
import re as _re
import fnmatch as _fnmatch
import datetime as _datetime
import yaml as _yaml

from .exceptions import *

__all__ = ["get_copyright_rules", "get_copyright", "merge_copyrights"]

__default_maintainer = ((2016, _datetime.date.today().year),
	'Thomas "Cakeisalie5" Touhey', 'thomas@touhey.fr')

# ---
# Gather the copyright rules from the filesystem.
# ---

def get_copyright_rules(root):
	""" Get the copyright rules from a root. """

	root = _os.path.join(root, '')
	rules = {}

	# Copyright file thing.
	for rt, _, _ in _os.walk(root):
		# Ignore file.
		try:
			with open(_os.path.join(rt, '.gitignore')) as f:
				rulz = []
				for line in f.readlines():
					# Get what's before the comment.
					line = _re.sub('([^\\\\](\\\\\\\\)*)#.*$', '\\1', line)
					line = _re.sub('\\\\(.*)', '\\1', line)
					line = line.strip()

					if line: rulz.append(line)

				for l in rulz:
					if l[0] == '/':
						nm = l[1:].split('/')
						bases = [_os.path.join(rt, *nm)]
					else:
						nm = l.split('/')
						bases = [_os.path.join(rt, *nm),
							_os.path.join(rt, '**', *nm)]

					for base in bases:
						rules[base] = '===IGNORED==='
		except FileNotFoundError:
			pass

		# Copyright file.
		try:
			with open(_os.path.join(rt, 'copyright.yml')) as f:
				d_rules = _yaml.load_all(f.read())

			# Read each rule.
			for rule in d_rules:
				ml = rule['files']
				if type(ml) != list:
					ml = [ml]

				l = rule['license'] if 'license' in rule else 'LGPL-3'
				authors = []
				if 'copyright' in rule:
					for author in rule['copyright']:
						if len(author) == 3:
							authors.append(((author[0], author[0]),
								author[1], author[2]))
						else:
							authors.append(((author[0], author[1]),
								author[2], author[3]))
				if not authors:
					authors.append(__default_maintainer)

				for m in ml:
					spl = m.split('/')
					m = _os.path.join(rt, *spl)
					rules[m] = {'authors': authors, 'license': l}
		except FileNotFoundError:
			pass

	return rules

# ---
# Get a file's copyright from the copyright rules obtained before.
# ---

def __match(rule, path):
	""" Check if a path matches a rule. """

	# Check if the full path is matched.
	if _fnmatch.fnmatch(path, rule):
		return True

	# Check if one of the parent directories is matched.
	while True:
		dirname, filename = _os.path.split(path)
		if not dirname: break
		if _fnmatch.fnmatch(dirname, rule):
			return True
		path = dirname

	# Nothing is matched. Exit!
	return False

def get_copyright(rules, path):
	""" Get a path's copyright using a set of rules. """

	if not _os.path.isfile(path):
		raise NotAFileException
	if _os.path.basename(path) == 'copyright.yml':
		raise IgnoredFileException

	match = None

	for rule in rules:
		# Check if the rule matches the path.
		if not __match(rule, path):
			continue

		# Check if it is the first thing.
		if not match:
			match = rule
			continue

		# If the current rule is more precise, take it instead.
		# TODO: of course that length rule is stupid and doesn't take into
		# account things such as "arch/**/info.yml is more precise than
		# arch/myplatform/mymodule/*", but it's a quick hack.
		if len(rule) > len(match):
			match = rule

	if not match:
		raise NoCopyrightException
	rule = rules[match]
	if rule == '===IGNORED===':
		raise IgnoredFileException
	return (rule['authors'], rule['license'])

# ---
# Merge file copyrights.
# ---

def __merge_copyright_dates(dates, dont_be_precise=True):
	""" Merge copyright dates.
		If `dont_be_precise` is False, [(2013, 2014), (2015, 2016)] will
		be left as is (otherwise, is turned as [(2013, 2016)]). """

	dates = sorted(dates, key=lambda x:x[0])

	fn = [dates[0]]
	for start, end in dates[1:]:
		last_start, last_end = fn[-1]

		if dont_be_precise: start -= 1
		if start <= last_end and end >= last_end:
			fn[-1] = (last_start, end)
		else:
			fn.append((start, end))
	return fn

def merge_copyrights(first, second, *others):
	""" Merge copyright tuples returned by `get_copyright()`. """

	# Check if there is more than two arguments.
	if others:
		first = merge_copyrights(first, second)
		for other in others:
			first = merge_copyrights(first, other)
		return first

	f_authors, f_license = first['authors'], first['license']
	s_authors, s_license = second['authors'], second['license']

	# Check that the licenses are compatible.
	# TODO: for now, it's stupid.
	if f_license != s_license:
		raise IncompatibleLicensesException(f_license, s_license)

	# Get the authors and all of their copyright dates.
	authors = {}
	for (start, end), name, mail in f_authors + s_authors:
		if not (name, mail) in authors:
			authors[name, mail] = []
		authors[name, mail].append((start, end))

	# Sort dates.
	for author in authors:
		authors[author] = __merge_copyright_dates(authors[author])

	copyrights = []
	for (name, mail), dates in authors.items():
		for start, end in dates:
			copyrights.append(((start, end), name, mail))

	return {'authors': copyrights, 'license': f_license}

# End of file.