diff options
author | Thomas "Cakeisalie5" Touhey <thomas@touhey.fr> | 2017-06-24 15:53:03 +0200 |
---|---|---|
committer | Thomas "Cakeisalie5" Touhey <thomas@touhey.fr> | 2017-06-24 15:53:03 +0200 |
commit | 67eb47b5f3a5ba41871484d08d1094d3fd481d6e (patch) | |
tree | 2d736ea7039987b2a6de9f4befb03458782c4c92 | |
parent | 09ca2e24d7206acd3b817fb688a1bbd3ca2898cc (diff) |
Starting making the binary format and reorganizing the project.
-rw-r--r-- | AUTHORS.md | 1 | ||||
-rw-r--r-- | CONTRIBUTING.md | 26 | ||||
-rw-r--r-- | FORMAT.md | 240 | ||||
-rw-r--r-- | Makefile | 31 | ||||
-rw-r--r-- | README.md | 180 | ||||
-rw-r--r-- | reference/categories.yml (renamed from categories.yml) | 0 | ||||
-rw-r--r-- | reference/characters.yml (renamed from characters.yml) | 0 | ||||
-rw-r--r-- | reference/fonts.yml (renamed from fonts.yml) | 0 | ||||
-rw-r--r-- | reference/mini/0x7FXX.pbm (renamed from mini/0x7FXX.pbm) | bin | 848 -> 848 bytes | |||
-rw-r--r-- | reference/mini/0xE5XX.pbm (renamed from mini/0xE5XX.pbm) | bin | 848 -> 848 bytes | |||
-rw-r--r-- | reference/mini/0xE6XX.pbm (renamed from mini/0xE6XX.pbm) | bin | 848 -> 848 bytes | |||
-rw-r--r-- | reference/mini/0xXX.pbm (renamed from mini/0xXX.pbm) | bin | 848 -> 848 bytes | |||
-rw-r--r-- | reference/normal/0x7FXX.pbm (renamed from normal/0x7FXX.pbm) | bin | 1169 -> 1169 bytes | |||
-rw-r--r-- | reference/normal/0xE5XX.pbm (renamed from normal/0xE5XX.pbm) | bin | 1169 -> 1169 bytes | |||
-rw-r--r-- | reference/normal/0xE6XX.pbm (renamed from normal/0xE6XX.pbm) | bin | 1169 -> 1169 bytes | |||
-rw-r--r-- | reference/normal/0xE7XX.pbm (renamed from normal/0xE7XX.pbm) | bin | 1169 -> 1169 bytes | |||
-rw-r--r-- | reference/normal/0xXX.pbm (renamed from normal/0xXX.pbm) | bin | 1169 -> 1169 bytes | |||
-rw-r--r-- | reference/sets.yml (renamed from sets.yml) | 0 | ||||
-rw-r--r-- | sets/C.Basic.set | bin | 0 -> 21532 bytes | |||
-rw-r--r-- | sets/CASIOWIN_1.00.set | bin | 0 -> 15408 bytes | |||
-rw-r--r-- | sets/CASIOWIN_1.05.set | bin | 0 -> 15427 bytes | |||
-rw-r--r-- | sets/CASIOWIN_2.00.set | bin | 0 -> 21036 bytes | |||
-rw-r--r-- | sets/CASIOWIN_2.04.set | bin | 0 -> 21157 bytes | |||
-rw-r--r-- | tools/__pycache__/fontcharacter.cpython-36.pyc | bin | 0 -> 5134 bytes | |||
-rwxr-xr-x | tools/fontcharacter.py | 176 | ||||
-rwxr-xr-x | tools/listsets.py | 21 | ||||
-rwxr-xr-x | tools/makebin.py | 109 |
27 files changed, 600 insertions, 184 deletions
@@ -8,5 +8,6 @@ Thanks to the other contributors for making FONTCHARACTER great again: Thanks to the following sources of information: * Raw OS Data, extracted using a tool based on a technique found by Simon Lothar; +* "La Casio Graph100", by Olivier Coupelon (May, 2002); * Casetta's CASIO Token List, by Florian Birée (May, 2007); * Various manuals from CASIO. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9946c6f..9ab7d25 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,14 +1,14 @@ # Contributing to FONTCHARACTER Reference -Start by reading all of the `README.md`, which describes the used formats. -This is for the people using the project, but it is also for you, fellow -contributor! Also, I hope you know YAML a little; or, you can still learn -on the job! :) +Start by reading all of the available documentation, from the `README.md` +to the formats description, `FORMATS.md` -- in order to know how to contribute +to the project, you have to know how to use it! ## What is left to do -The main thing that is left to do is define the sets, and check which characters -appeared in each of the sets. The FONTCHARACTERS extensions do **not** have -priority (to be honest, the addition of C.Basic was not really planned -straight away...), so please try to concentrate on CASIO sets first. +The main thing that is left to do is define the sets, and check which +characters appeared in each of the sets. +The FONTCHARACTER extensions do **not** have priority +(to be honest, the addition of C.Basic was not really planned +straight away...), so please try to concentrate on CASIO's sets first. To achieve this, you can use my [Opcode Table C Extracting Tool][extract], and the OSes on [Planète Casio's Bible][oses]. @@ -18,10 +18,12 @@ You have tried using the reference, but it doesn't match the characters on your calculator? Yes, it indeed could be a mistake, but it might as well be a compatibility issue! -You should check in the sets if your model/OS is supported. If you could try to -correct the reference while not breaking everything (even though the maintainers -probably won't allow that), that would be great! Otherwise, you can try to -contact the current project maintainer. +You should check in the sets if your model/OS is supported. If you could try +to correct the reference while not breaking everything (even though the +maintainers will probably be careful), that would be great! + +Otherwise, you can try to contact the current project maintainer(s). +You can find their details in `AUTHORS.md`. [extract]: http://www.casiopeia.net/forum/viewtopic.php?p=14742#p14742 [oses]: http://bible.planet-casio.com/casio/os_boot_setup/ diff --git a/FORMAT.md b/FORMAT.md new file mode 100644 index 0000000..7213ba5 --- /dev/null +++ b/FORMAT.md @@ -0,0 +1,240 @@ +# FONTCHARACTER reference formats +To know more about the project itself, and what these formats are, +see `README.md`. + +To use the source format, you'll have to include the current repository, +probably as a git submodule, and read it directly. It is highly unrecommended: +the source format is complex to read (but easy to manage for a human), +and can evolve (with the included tools). + +To use the binary format, the installed generated files will probably be in +a common folder such as `/usr/share/casio/fontcharacter/*.set` on a +GNU/Linux distribution or OS X, or +`C:\Program Files\CASIO\FONTCHARACTER\*.set` under Microsoft Windows +(yet to be confirmed). + +## Source format +YAML has been chosen to store the information, as it's a storage format that +a machine and a human can read and write quite easily. + +### Sets +A set is basically a pack of characters appeared at the same time on CASIO +calculators, or in an extension (alternative CASIO Basic +interpreters/compilers). + +`sets.yml` is the sets file. For each set: + +- the `description` field is the description of the set; +- if the `default` field is there, then it is the default set to use + (generally the most recent set made by CASIO); +- if the `leading` field is there, the list of leading characters is in it, + separated by commas; +- if the `parent` field is there, then the set inherits all of the characters + of its parents, and, if the child has no `leading` field, its parent's + leading characters. + +### Categories +`categories.yml` is the categories file. Each category has an `id` field, which +is the identification string, an optional `prefix` field and an optional `sub` +list, which is the subcategories with each an `id` and a `prefix` fields. +To access the subcategory "Latin Capital" in the category "Letter", the +`category` field in the character information will have to be +`Letter/Latin Capital/Mini`. The name of the character will then be prefixed by +`Mini Latin Capital Letter ` (with the spaces between prefixes and an ending +space); the subcategory prefix goes first. If there is a suffix, a space then +it are appended to the character name, for example, ` Digit`. + +There are some more fields -- see the _Embedded CASIO BASIC documentation_ +section. + +### Characters +There are two systems of characters on CASIO calculators: Simon Lothar calls +them the "characters" and the "opcodes". The "characters" are simple characters +with a display, and the "opcodes", which are defined by a set of characters +(e.g. "Locate "). The two are described in two different tables on the +calculator, but the two describe the same encoding, so that's why this reference +considers all "characters" and "opcodes" as characters ("opcodes" are here +called multi-characters). + +`characters.yml` is the file containing data about the characters. For each +character, the `code` field is its `FONTCHARACTER` code, the `name` field is +the complete description of the character, the `flags` are the character flags +and the `category` field is the category(/subcategory) ID (see in the last +paragraph). If there is no category field, the category is "Other", with no +prefix. + +Flags is a list of flag strings. Current flags are: +* `nl`: the character should be followed by a newline; +* `esc`: the character's CTF token is escaped with a reverse solidus; +* `sep`: the character is a Basic separator; +* `base`: only accessible in BASE programs. + +Some characters have an ASCII token representation, mostly for the *cat*, +*newcat*, *ctf* and *casemul* formats. If the `tokens` field exists, then +it is a dictionary of the tokens in the different formats. +- If the `cat` field of the dictionary doesn't exist, its value is deduced + recursively using the `multi` field is there, or from the `unicode` field + (if all-`ASCII`), and prefixed by a reverse solidus '\\'; +- If the `newcat` field of the dictionary doesn't exist, it takes its + value from the `cat` field; +- If the `ctf` field of the dictionary doesn't exist, it takes its value from + the `cat` field if it was not deduced, otherwise, it is deduced the same way + as the `cat` field, but it is not prefixed with a reverse solidus '\\'; +- If the `casemul` field of the dictionary doesn't exist, it is deduced the + same way than the `ctf` field; +- If the `ref` field of the dictionary doesn't exist, it takes the + (first) value of the `ctf` field. + +There can be multiple tokens for one format; in this case, the value of the +format field is a list. + +It is possible to obtain an ASCII/HTML representation of most characters: +- If tokens exist, take the `ref` token; +- Otherwise, if the `multi` field is specified, then the representation can be + obtained recursively by querying this field's elements; +- Otherwise, no ASCII representation is available. + +The `id` field is an identifier for the character, composed of letters, +numbers and underscores. It can be used for C defines. +If there is no `id` field, it is the value in the `ascii` field if it can +be deduced (or the `name` field if it can't), with hyphens turned into +underscores, and other non-valid characters removed (spaces, parenthesis, ...). + +You have to distinguish multi-characters opcodes and simple opcodes. +Multi-character opcodes are characters that simply are a sequence of simple +characters. You can distinguish them from simple opcodes by checking the +presence of a `multi` field, which then is the `FONTCHARACTER` codes of the +characters in the sequence, separated with commas. + +Multi-characters are distinguishable from simple characters by checking the +presence of a `multi` field. The `multi` field is the `FONTCHARACTER` codes of +the characters composing it, separated by commas. Be careful: there can be +only one character for the multi-character, and Yaml won't interpret this as +a string, but as a number directly! + +If the character is simple, then if there is a unicode sequence equivalent of +the character, the Unicode codes of the sequences separated with commas will be +in the `unicode` field; otherwise, the field doesn't exist. + +If the character data has a `set` field, then the character is in a set; +otherwise, it should be considered as part of the default set. + +#### Embedded CASIO BASIC documentation +Some characters will have the `type` field. This type means they have a special +meaning in CASIO Basic. There are two types: `function` and `object`. There is +an associated syntax, which is either `<name>(arg1, arg2)` or +`<name> arg1,arg2`, the first syntax is when `par` is `true` and the second one +is when it is `false`. +Note that for the first syntax, the ending parenthesis is not mandatory. + +If `par` is `false` (or non-existent), then the `fix` field can be +set to `infix`, which means the function will be used with either +`arg1 <name>` or `arg1 <name> arg2`. + +If the function/object should receive arguments, it can be documented using the +`args` field, and if it has, after these arguments, optional arguments, it can +be documented with the `optn` field. These fields receives a list of argument +strings. An argument type can be imposed by add-in `:<code>` at the end of the +argument string; for example, here are the `For` and `To` entries: + + - + code: 0xF704 + name: For + category: Statement + args: ["to:0xF705"] + action: ... + multi: [0x46, 0x6F, 0x72, 0x20] + - + code: 0xF705 + name: To + category: Operator + args: ["assign:0x0E"] + optn: ["step:0xF706"] + action: ... + multi: [0x20, 0x54, 0x6F, 0x20] + +If the function is supposed to make an action, this action can be documented +using the `action` field. If it is supposed to return something, it should can +be documented using the `return` field. + +### Fonts +`fonts.yml` is the file containing the fonts information. For each font, +`id` is the ID string, `name` is the complete name, `author` is the complete +author name, `width` and `height` are the dimensions of each character in +the font. + +For each font, there is a corresponding folder, named with the font ID. +This folder contains the characters images, organized by the leading multi-byte +character; if there is none, the file `0xXX.pbm` will be chosen, otherwise, +the file `0xLLXX.pbm` will be chosen, where `0xLL` is the leading character. +If the file doesn't exist, the character is to be considered as blank. + +Each existing file is a set of 256 tiles of `width * height` each. Each row is +the tiles going from `0xR0` to `0xRF`, where `0xR` is the row number +(0x0 to 0xF). + +## Binary format +The binary format is divided into four zones: + +- the overall header; +- the leading character pool; +- the character pool; +- the data pool. + +Multi-byte fields are encoded as **big endian**. + +### Overall header +The file starts with an overall header, describing the structure of the rest +of the file. It starts with the following: + +- Magic string (8 bytes): "CASIOFC\x7F"; +- Version byte (1 byte): 0x01 + +If the magic string is not verified, the file is either corrupted or of an +other file. If the version byte isn't verified, then the file uses a different +version from the current one, and you should return that the user needs an +upgraded version of your utility (because you'll keep updating it... right?), +or a more recent utility. + +The overall header continues with the following fields: + +- Number of majors (1 byte): this is the number of entries in the leading + characters pool (second zone of the file); +- Number of characters (2 bytes): the total number of characters; +- Checksum (4 bytes): basic checksum for the leading character pool, + character pool, and data pool (if zero, do not check the checksum); +- File size (4 bytes): the file size; +- Data zone size (4 bytes): the data zone size. + +The checksumming technique is simple: you add all of the data, byte per byte, +in a 32-bit variable. For example, the checksum of \[0xFF, 0x02, 0x03\] is +0x00000104. Overflow is allowed (0xFFFFFFFF + 2 = 0x00000001). + +### Leading character pool +This pool provides quick access to the characters under a leading-character. +Each entry is made of the following: + +- Leading character byte (1 byte), e.g. 0x00 or 0xE5; +- Reserved (1 byte), always zero; +- Starting entry ID in the character pool (2 bytes); + +The offset is to be multiplied by the size of a character entry (which is +constant). + +### Character pool +This pool provides the character entries. For quick access, each entry is +the same size, the variable data being stored in the data pool. +Each entry has the following format: + +- Leading character (1 byte), e.g. 0x00 or 0xE5; +- Main character (1 byte), e.g. 0x45 for 0xE545; +- Unicode string size (1 byte), 0 if no unicode string or FONTCHARACTER seq.; +- FONTCHARACTER sequence size (1 byte), 0 if not a FONTCHARACTER sequence; +- Unicode string offset in pool data (4 bytes); +- Sequence offset in pool data (4 bytes); + +### Data pool +Raw data is stored here. To get the size of this zone, take the file size +in the overall header and remove the size of the three previous zones. +This size is duplicated in the header, and the correlation between the +calculated and given sizes should be checked. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..eb89b3f --- /dev/null +++ b/Makefile @@ -0,0 +1,31 @@ +#!/usr/bin/make -f +#*****************************************************************************# +# Variables # +#*****************************************************************************# +# Directories. + REFDIR := ./reference + SETDIR := ./sets + +# Reference files, set files. + REF := $(wildcard $(REFDIR)/*.yml) + SETS := $(patsubst %,$(SETDIR)/%.set,\ + $(shell tools/listsets.py --refpath $(REFDIR))) +#*****************************************************************************# +# Targets # +#*****************************************************************************# +# Make all of the sets. +all: $(SETS) + $(SETS): $(REF) + tools/makebin.py --output $(SETDIR) --refpath $(REFDIR) + +# Clean all of the sets +clean: + rm -rf $(SETDIR) + +# Install all of the sets. +install: all + /usr/bin/install -m 755 -d "/usr/share/casio-fontcharacter" + /usr/bin/install $(SETS) "/usr/share/casio-fontcharacter" + +.PHONY: all clean install +# End of file. @@ -4,11 +4,11 @@ It is an encoding made up for CASIO calculators. It is partially ASCII retrocompatible. -Each character in the fixed-width form is two bytes long. The multi-byte -encoding is simple: if it is one of the leading characters of the set, -then the character is two bytes long, otherwise, it is one bytes long -(e.g. if 0xE5 is a leading character, 0xE5 followed by 0x46 forms the 0xE546 -character, and 0x46 is a character alone and forms the 0x0046 character). +It is a simple multi-byte encoding, where some characters are multi-byte +sequence leaders. Each character can occupy up to two bytes (leading character, +then whatever -- a leading character after another one isn't). +For example, if 0xE5 is a leading character, 0xE5 followed by 0xE5 forms +the 0xE5E5 character, and if 0x46 isn't, it forms the 0x46 character. This project is here to centralize all the info about it : existing characters, their appearance in the standard fonts, their Unicode equivalents and defines. @@ -16,172 +16,8 @@ The goal of this project is to give the ability to any project (thanks to [LGPL3][lgpl3]) to be able to read, write, describe and convert from or to CASIO's proprietary encoding. -You can use it directly or with the -[libfontcharacter](https://github.com/cakeisalie5/libfontcharacter) library. -If you really want to use it directly, the _Project organization_ section -describes the data organization. - -## Contributing -See `CONTRIBUTING.md`. Thanks in advance! - -## Project organization -YAML has been chosen to store the information, as it's a storage format that -a machine and a human can read and write quite easily. - -### Sets -A set is basically a pack of characters appeared at the same time on CASIO -calculators, or in an extension (alternative CASIO Basic -interpreters/compilers). - -`sets.yml` is the sets file. For each set: - -- the `description` field is the description of the set; -- if the `default` field is there, then it is the default set to use - (generally the most recent set made by CASIO); -- if the `leading` field is there, the list of leading characters is in it, - separated by commas; -- if the `parent` field is there, then the set inherits all of the characters of - its parents, and, if the child has no `leading` field, its parent's leading - characters. - -### Categories -`categories.yml` is the categories file. Each category has an `id` field, which -is the identification string, an optional `prefix` field and an optional `sub` -list, which is the subcategories with each an `id` and a `prefix` fields. -To access the subcategory "Latin Capital" in the category "Letter", the -`category` field in the character information will have to be -`Letter/Latin Capital/Mini`. The name of the character will then be prefixed by -`Mini Latin Capital Letter ` (with the spaces between prefixes and an ending -space); the subcategory prefix goes first. If there is a suffix, a space then it -are appended to the character name, for example, ` Digit`. - -There are some more fields -- see the _Embedded CASIO BASIC documentation_ -section. - -### Characters -There are two systems of characters on CASIO calculators: Simon Lothar calls -them the "characters" and the "opcodes". The "characters" are simple characters -with a display, and the "opcodes", which are defined by a set of characters -(e.g. "Locate "). The two are described in two different tables on the -calculator, but the two describe the same encoding, so that's why this reference -considers all "characters" and "opcodes" as characters ("opcodes" are here -called multi-characters). - -`characters.yml` is the file containing data about the characters. For each -character, the `code` field is its `FONTCHARACTER` code, the `name` field is -the complete description of the character, the `flags` are the character flags -and the `category` field is the category(/subcategory) ID (see in the last -paragraph). If there is no category field, the category is "Other", with no -prefix. - -Flags is a list of flag strings. Current flags are: -* `nl`: the character should be followed by a newline; -* `esc`: the character's CTF token is escaped with a reverse solidus; -* `sep`: the character is a Basic separator; -* `base`: only accessible in BASE programs. - -Some characters have an ASCII token representation, mostly for the *cat*, -*newcat*, *ctf* and *casemul* formats. If the `tokens` field exists, then -it is a dictionary of the tokens in the different formats. -- If the `cat` field of the dictionary doesn't exist, its value is deduced - recursively using the `multi` field is there, or from the `unicode` field - (if all-`ASCII`), and prefixed by a reverse solidus '\\'; -- If the `newcat` field of the dictionary doesn't exist, it takes its - value from the `cat` field; -- If the `ctf` field of the dictionary doesn't exist, it takes its value from - the `cat` field if it was not deduced, otherwise, it is deduced the same way - as the `cat` field, but it is not prefixed with a reverse solidus '\\'; -- If the `casemul` field of the dictionary doesn't exist, it is deduced the - same way than the `ctf` field; -- If the `ref` field of the dictionary doesn't exist, it takes the - (first) value of the `ctf` field. - -There can be multiple tokens for one format; in this case, the value of the -format field is a list. - -It is possible to obtain an ASCII/HTML representation of most characters: -- If tokens exist, take the `ref` token; -- Otherwise, if the `multi` field is specified, then the representation can be - obtained recursively by querying this field's elements; -- Otherwise, no ASCII representation is available. - -The `id` field is an identifier for the character, composed of letters, -numbers and underscores. It can be used for C defines. -If there is no `id` field, it is the value in the `ascii` field if it can -be deduced (or the `name` field if it can't), with hyphens turned into -underscores, and other non-valid characters removed (spaces, parenthesis, ...). - -You have to distinguish multi-characters opcodes and simple opcodes. -Multi-character opcodes are characters that simply are a sequence of simple -characters. You can distinguish them from simple opcodes by checking the -presence of a `multi` field, which then is the `FONTCHARACTER` codes of the -characters in the sequence, separated with commas. - -Multi-characters are distinguishable from simple characters by checking the -presence of a `multi` field. The `multi` field is the `FONTCHARACTER` codes of -the characters composing it, separated by commas. Be careful: there can be -only one character for the multi-character, and Yaml won't interpret this as -a string, but as a number directly! - -If the character is simple, then if there is a unicode sequence equivalent of -the character, the Unicode codes of the sequences separated with commas will be -in the `unicode` field; otherwise, the field doesn't exist. - -If the character data has a `set` field, then the character is in a set; -otherwise, it should be considered as part of the default set. - -#### Embedded CASIO BASIC documentation -Some characters will have the `type` field. This type means they have a special -meaning in CASIO Basic. There are two types: `function` and `object`. There is -an associated syntax, which is either `<name>(arg1, arg2)` or -`<name> arg1,arg2`, the first syntax is when `par` is `true` and the second one -is when it is `false`. -Note that for the first syntax, the ending parenthesis is not mandatory. - -If `par` is `false` (or non-existent), then the `fix` field can be -set to `infix`, which means the function will be used with either -`arg1 <name>` or `arg1 <name> arg2`. - -If the function/object should receive arguments, it can be documented using the -`args` field, and if it has, after these arguments, optional arguments, it can -be documented with the `optn` field. These fields receives a list of argument -strings. An argument type can be imposed by add-in `:<code>` at the end of the -argument string; for example, here are the `For` and `To` entries: - - - - code: 0xF704 - name: For - category: Statement - args: ["to:0xF705"] - action: ... - multi: [0x46, 0x6F, 0x72, 0x20] - - - code: 0xF705 - name: To - category: Operator - args: ["assign:0x0E"] - optn: ["step:0xF706"] - action: ... - multi: [0x20, 0x54, 0x6F, 0x20] - -If the function is supposed to make an action, this action can be documented -using the `action` field. If it is supposed to return something, it should can -be documented using the `return` field. - -### Fonts -`fonts.yml` is the file containing the fonts information. For each font, -`id` is the ID string, `name` is the complete name, `author` is the complete -author name, `width` and `height` are the dimensions of each character in -the font. - -For each font, there is a corresponding folder, named with the font ID. -This folder contains the characters images, organized by the leading multi-byte -character; if there is none, the file `0xXX.pbm` will be chosen, otherwise, -the file `0xLLXX.pbm` will be chosen, where `0xLL` is the leading character. -If the file doesn't exist, the character is to be considered as blank. - -Each existing file is a set of 256 tiles of `width * height` each. Each row is -the tiles going from `0xR0` to `0xRF`, where `0xR` is the row number -(0x0 to 0xF). +You can use this reference through its source format, through its binary +format (better), or through [libfontcharacter][libfc] (recommended!). +[libfc]: https://github.com/PlaneteCasio/libfontcharacter [lgpl3]: https://www.gnu.org/licenses/lgpl-3.0.en.html diff --git a/categories.yml b/reference/categories.yml index acbb5dc..acbb5dc 100644 --- a/categories.yml +++ b/reference/categories.yml diff --git a/characters.yml b/reference/characters.yml index 0fd52ff..0fd52ff 100644 --- a/characters.yml +++ b/reference/characters.yml diff --git a/fonts.yml b/reference/fonts.yml index b372441..b372441 100644 --- a/fonts.yml +++ b/reference/fonts.yml diff --git a/mini/0x7FXX.pbm b/reference/mini/0x7FXX.pbm Binary files differindex 263c51f..263c51f 100644 --- a/mini/0x7FXX.pbm +++ b/reference/mini/0x7FXX.pbm diff --git a/mini/0xE5XX.pbm b/reference/mini/0xE5XX.pbm Binary files differindex 1aa49c3..1aa49c3 100644 --- a/mini/0xE5XX.pbm +++ b/reference/mini/0xE5XX.pbm diff --git a/mini/0xE6XX.pbm b/reference/mini/0xE6XX.pbm Binary files differindex f3c8aff..f3c8aff 100644 --- a/mini/0xE6XX.pbm +++ b/reference/mini/0xE6XX.pbm diff --git a/mini/0xXX.pbm b/reference/mini/0xXX.pbm Binary files differindex 623e67e..623e67e 100644 --- a/mini/0xXX.pbm +++ b/reference/mini/0xXX.pbm diff --git a/normal/0x7FXX.pbm b/reference/normal/0x7FXX.pbm Binary files differindex b441b50..b441b50 100644 --- a/normal/0x7FXX.pbm +++ b/reference/normal/0x7FXX.pbm diff --git a/normal/0xE5XX.pbm b/reference/normal/0xE5XX.pbm Binary files differindex 47d849c..47d849c 100644 --- a/normal/0xE5XX.pbm +++ b/reference/normal/0xE5XX.pbm diff --git a/normal/0xE6XX.pbm b/reference/normal/0xE6XX.pbm Binary files differindex ef48f89..ef48f89 100644 --- a/normal/0xE6XX.pbm +++ b/reference/normal/0xE6XX.pbm diff --git a/normal/0xE7XX.pbm b/reference/normal/0xE7XX.pbm Binary files differindex d8962d4..d8962d4 100644 --- a/normal/0xE7XX.pbm +++ b/reference/normal/0xE7XX.pbm diff --git a/normal/0xXX.pbm b/reference/normal/0xXX.pbm Binary files differindex d5a8251..d5a8251 100644 --- a/normal/0xXX.pbm +++ b/reference/normal/0xXX.pbm diff --git a/sets.yml b/reference/sets.yml index 0e07426..0e07426 100644 --- a/sets.yml +++ b/reference/sets.yml diff --git a/sets/C.Basic.set b/sets/C.Basic.set Binary files differnew file mode 100644 index 0000000..21c672c --- /dev/null +++ b/sets/C.Basic.set diff --git a/sets/CASIOWIN_1.00.set b/sets/CASIOWIN_1.00.set Binary files differnew file mode 100644 index 0000000..c5c523e --- /dev/null +++ b/sets/CASIOWIN_1.00.set diff --git a/sets/CASIOWIN_1.05.set b/sets/CASIOWIN_1.05.set Binary files differnew file mode 100644 index 0000000..6b8b33b --- /dev/null +++ b/sets/CASIOWIN_1.05.set diff --git a/sets/CASIOWIN_2.00.set b/sets/CASIOWIN_2.00.set Binary files differnew file mode 100644 index 0000000..5c6d15d --- /dev/null +++ b/sets/CASIOWIN_2.00.set diff --git a/sets/CASIOWIN_2.04.set b/sets/CASIOWIN_2.04.set Binary files differnew file mode 100644 index 0000000..9736485 --- /dev/null +++ b/sets/CASIOWIN_2.04.set diff --git a/tools/__pycache__/fontcharacter.cpython-36.pyc b/tools/__pycache__/fontcharacter.cpython-36.pyc Binary files differnew file mode 100644 index 0000000..705e416 --- /dev/null +++ b/tools/__pycache__/fontcharacter.cpython-36.pyc diff --git a/tools/fontcharacter.py b/tools/fontcharacter.py new file mode 100755 index 0000000..1542c77 --- /dev/null +++ b/tools/fontcharacter.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +import os, sys, string, yaml + +# Sainitize for the `id` field +def sanitize_id(s): + return ''.join(ch for ch in s \ + if ch in string.ascii_letters + string.digits) +#*****************************************************************************# +# Reference class # +#*****************************************************************************# +class Reference: + # Initialization + def __init__(self, path, sets_only=False): + # Load sets + self.__load_sets(yaml.load(open(\ + os.path.join(path, 'sets.yml')).read())) + if sets_only: return + + # Load categories + self.categories = {} + for c in yaml.load(open(os.path.join(path, 'categories.yml')).read()): + self.__explore_category(c, '', '', '') + + # Load all of the YAML files + self.__load_characters(yaml.load(open(\ + os.path.join(path, 'characters.yml')).read())) + + # Gather leaders [TODO: `leading` field?] + for st in self.sets.keys(): + lead = [] + for code in self.sets[st]['characters'].keys(): + lead += [code >> 8] + self.sets[st]['leading'] = set(lead) + + # Utility to explore a category + def __explore_category(self, c, id, prefix, suffix): + # Iterate on things + id += c['id'] + try: prefix = c['prefix'] + ' ' + prefix + except: True + try: suffix = suffix + ' ' + c['suffix'] + except: True + + # Add current (sub)category + self.categories[id] = {'prefix': prefix, 'suffix': suffix} + + # Explore subcategories + if c.get('sub'): + for s in c['sub']: + self.__explore_category(s, id + '/', prefix, suffix) + + # Utility to explore sets + def __load_sets(self, raw): + self.default_set = '' + self.sets = {} + + # Initialize kids + kids = {} + + # Read raw entries + for s in raw: + self.sets[s['id']] = { + 'description': s['description'], + 'characters': {}, + 'parent': s.get('parent'), + 'kids': []} + if s.get('default'): self.default_set = s['id'] + if s.get('parent'): + if not kids.get(s['parent']): kids[s['parent']] = [] + kids[s['parent']] += [s['id']] + + # Add kids to real elements + for parent, k in kids.items(): + self.sets[parent]['kids'] += kids[parent] + + # Inherit character + def __inherit_character(self, id, code, inherit, pr): + '''id: id of the set, code: code of the character, + inherit: the set to inherit it from, + pr: priority (starting from 0, the more it is, the further it is)''' + if not self.sets[id]['characters'].get(code) \ + or self.sets[id]['characters'][code]['_pr'] > pr: + self.sets[id]['characters'][code] = {'inherit': inherit, '_pr': pr} + for k in self.sets[id]['kids']: + self.__inherit_character(k, code, inherit, pr + 1) + + # Utility to load characters + def __load_characters(self, raw): + # Main loop + for c in raw: + # Get the complete name + n = c['name'] + if c.get('category'): + ct = self.categories[c['category']] + n = ct['prefix'] + n + ct['suffix'] + + # Get the character set, and the priority + try: st = c['set'] + except: st = self.default_set + + # Make the character + code = c['code'] + char = {'name': n, '_pr': 0} + + # Check the multi thingy + m = c.get('multi') + if type(m) == list and m: + char['multi'] = \ + list(map(lambda x:int(x, 16) if type(x) == str else x, m)) + elif type(m) == int: + char['multi'] = [m] + + # Check the unicode thingy + u = c.get('unicode') + if type(u) == list and u: + char['unicode'] = \ + list(map(lambda x:int(x, 16) if type(x) == str else x, u)) + elif type(u) == int: + char['unicode'] = [u] + + # Check the ascii thingy + if c.get('ascii'): + char['ascii'] = c['ascii'] + elif char.get('unicode') \ + and all(0x00 <= c <= 0xFF for c in char['unicode']): + char['ascii'] = ''.join(map(chr,char['unicode'])) + + # Check the id thingy + if c.get('id'): + char['id'] = c['id'] + elif char.get('ascii'): + char['id'] = sanitize_id(char['ascii']) + if not char.get('id') and not char.get('multi'): + char['id'] = sanitize_id(char['name']) + + # Add it to the set + self.sets[st]['characters'][code] = char + for k in self.sets[st]['kids']: + self.__inherit_character(k, code, st, 1) + + # Get ascii/unicode equivalents + for id, st in self.sets.items(): + for code in st['characters'].keys(): + self.__deduce_character_id(id, code) + + # Calculate a multi-character's id + def __deduce_character_id(self, id, code): + char = self.sets[id]['characters'][code] + if char['_pr'] > 0 or char.get('id'): return + m = "" + if not char.get('multi'): m = sanitize_id(char.get('name')) + else: + for num, c in map(lambda x:(x, self.sets[id]['characters'][x]), \ + char['multi']): + parent = id + if c['_pr'] > 0: + parent = c['inherit'] + c = self.sets[parent]['characters'][num] + if c.get('multi'): + self.__deduce_character_id(parent, num) + m += c['id'] + char['id'] = m + + # Get the list of sets + def list(self): + l = list(self.sets.keys()) + l.remove(self.default_set) + return [self.default_set] + l + + # Get a set + def get(self, id = None): + if type(id) != str: + id = self.default_set + st = self.sets[id] + st['id'] = id + return st diff --git a/tools/listsets.py b/tools/listsets.py new file mode 100755 index 0000000..4f116c5 --- /dev/null +++ b/tools/listsets.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +import os +from argparse import ArgumentParser +from fontcharacter import Reference +#*****************************************************************************# +# Parse arguments # +#*****************************************************************************# +ap = ArgumentParser(description="FONTCHARACTER reference sets lister") + +ap.add_argument('--refpath', help='The reference path.', default=os.getcwd()) +args = ap.parse_args() +#*****************************************************************************# +# Main things # +#*****************************************************************************# +# Obtain the reference. +ref = Reference(args.refpath, True) + +# List the sets. +print('\n'.join(ref.list())) + +# End of file. diff --git a/tools/makebin.py b/tools/makebin.py new file mode 100755 index 0000000..ad47cb1 --- /dev/null +++ b/tools/makebin.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +import os, shutil +from argparse import ArgumentParser +from fontcharacter import Reference +#*****************************************************************************# +# Parse arguments # +#*****************************************************************************# +ap = ArgumentParser(description="FONTCHARACTER reference binary generator") + +ap.add_argument('--output', '-o', help='The output directory path.', + default=os.path.join(os.getcwd(), 'generated_sets')) +ap.add_argument('--refpath', help='The reference path.', default=os.getcwd()) +args = ap.parse_args() +#*****************************************************************************# +# Function for encoding # +#*****************************************************************************# +def encode_set(ref, fset): + bheader = bytes(list(map(ord, "CASIOFC\x7F")) + [0x01]) + blead = bytes() + bchars = bytes() + bdata = bytes() + + # Encode characters + leading = {} + char_count = 0 + for char_id, (code, char) in enumerate(fset['characters'].items()): + char_count = char_id + 1 + + if char['_pr'] > 0: + char = ref.get(char['inherit'])['characters'][code] + mul_off = 0 + mul_sz = 0 + if char.get('multi'): + mul_off = len(bdata) + mul = [] + for code in char['multi']: + mul += [code >> 8, code & 0xFF] if code > 0xFF else [code] + mul_sz = len(mul) + bdata += bytes(mul) + mul_off = [mul_off >> 24, (mul_off >> 16) & 0xFF, + (mul_off >> 8) & 0xFF, mul_off & 0xFF] + + uni_off = 0 + uni_sz = 0 + if char.get('unicode'): + uni_off = len(bdata) + uni = ''.join(map(chr, char['unicode'])).encode('utf-8') + uni_sz = len(uni) + bdata += uni + uni_off = [uni_off >> 24, (uni_off >> 16) & 0xFF, + (uni_off >> 8) & 0xFF, uni_off & 0xFF] + + lead = code >> 8 + main = code & 0xFF + if not lead in leading: + leading[lead] = char_id + + ent = [lead, main, uni_sz, mul_sz] + uni_off + mul_off + bchars += bytes(ent) + + # Complete the main header information. + bheader += bytes([len(leading), char_count >> 8, char_count & 0xFF]) + + # Correct the leading. + for lead in fset['leading']: + if not lead in leading: + leading[lead] = char_count + + # Make the leading tab. + for lead, off in leading.items(): + ent = [lead, 0, off >> 8, off & 0xFF] + blead += bytes(ent) + + # Make the checksum. + csum = (sum(blead) + sum(bchars) + sum(bdata)) & 0xFFFFFFFF + csum = [csum >> 24, (csum >> 16) & 0xFF, (csum >> 8) & 0xFF, csum & 0xFF] + + # Make the data length. + dlen = len(bdata) + dlen = [dlen >> 24, (dlen >> 16) & 0xFF, (dlen >> 8) & 0xFF, dlen & 0xFF] + + # Make the length. + length = len(bheader) + 12 + len(blead) + len(bchars) + len(bdata) + length = [length >> 24, (length >> 16) & 0xFF, (length >> 8) & 0xFF, + length & 0xFF] + + # Finish making the main header. + bheader += bytes(csum + length + dlen) + + return bheader + blead + bchars + bdata +#*****************************************************************************# +# Main things # +#*****************************************************************************# +# Obtain the reference. +ref = Reference(args.refpath) + +# Make the directory. +if os.path.isdir(args.output): + shutil.rmtree(args.output) +elif os.path.exists(args.output): + os.remove(args.output) +os.makedirs(args.output) + +# For each set, make the file. +for set_name, set_val in map(lambda x: (x, ref.get(x)), ref.list()): + with open(os.path.join(args.output, set_name + '.set'), 'wb') as f: + f.write(encode_set(ref, set_val)) + +# End of file. |