diff options
author | Thomas "Cakeisalie5" Touhey <thomas@touhey.fr> | 2017-04-18 13:26:32 +0200 |
---|---|---|
committer | Thomas "Cakeisalie5" Touhey <thomas@touhey.fr> | 2017-04-18 13:26:32 +0200 |
commit | 2b1ddb4e1c06fe478eda19549aa9197b3f3b0290 (patch) | |
tree | e763ee7348f41b8b7f1823bf20daa9f42f8f39b9 | |
parent | 613ea4731051bf2bcb6b5c61dc4e719d87e6abd0 (diff) |
Continued contributing to umachine.h
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | include/builtin.h | 7 | ||||
-rw-r--r-- | include/umachine.h | 252 |
3 files changed, 222 insertions, 42 deletions
@@ -2,4 +2,9 @@ /obj /lib*.a +# that's my test +/src/ptdr.c + +# and that's temporary files nobody wants +.DS_Store .*.swp diff --git a/include/builtin.h b/include/builtin.h index 34de5c5..9dc454f 100644 --- a/include/builtin.h +++ b/include/builtin.h @@ -79,12 +79,7 @@ extern int _builtin_macl(int *__x, int *__y, unsigned int __count); extern int _builtin_macll(int *__x, int *__y, unsigned int __count, unsigned int __mask); -# if __HITACHI_VERSION__ == 0x0600 -extern int _builtin_trapa_svc(...); -# else /* 0x0904 */ -extern int _builtin_trapa_svc(int __a, int __b, ...); -# endif - +extern int _builtin_trapa_svc(int __code, int __r0, ...); extern void _builtin_prefetch(void *__x); extern void _builtin_set_fpscr(int __x); diff --git a/include/umachine.h b/include/umachine.h index 93e7473..25e71bc 100644 --- a/include/umachine.h +++ b/include/umachine.h @@ -189,8 +189,10 @@ static __inline void __tst_gbr_byte(uint32_t __offset, uint8_t __byte) { return (__mul); } __fn__; })(_PTR1, _PTR2, _MASK) # endif /* ************************************************************************** */ -/* trapa with arguments */ +/* OS System Call */ /* ************************************************************************** */ +/* Notice that CASIOWIN (CASIO's system) doesn't use this interface, so this + * will not work with them. */ # if defined(__HITACHI__) # define trapa_svc _builtin_trapa_svc @@ -272,20 +274,23 @@ static __inline void __tst_gbr_byte(uint32_t __offset, uint8_t __byte) { (__VA_ARGS__) # endif /* ************************************************************************** */ -/* ??? */ +/* Prefetch Data to the Cache */ /* ************************************************************************** */ -/* TODO: find out */ +/* Load data to cache on software prefetching. */ # if defined(__HITACHI__) -# define prefetch _builtin_prefetch +# define prefetch(_X) _builtin_prefetch(_X) # elif defined(__GNUC__) -# define prefetch(_A, _B, ...) ((void)0) +static __inline void __prefetch(void *addr) { + asm("pref @%0"::"r"(addr)); } + +# define prefetch(_X) __prefetch(_X) # endif /* ************************************************************************** */ /* Interact with the FPU System Register */ /* ************************************************************************** */ -/* The FPU System Register is <TODO>. */ +/* The FPU System Register is <TODO: describe it>. */ # if defined(__HITACHI__) # define set_fpscr(_CR) _builtin_set_fpscr(_CR) @@ -305,49 +310,210 @@ static __inline uint32_t __get_fpscr(void) { /* Floating-point Operations */ /* ************************************************************************** */ /* FIPR is Floating-Point Inner Product, - * FTRV is Floating-Point Transform Vector. */ + * FTRV is Floating-Point Transform Vector. + * MTRX is matrix. */ # if defined(__HITACHI__) +# define add4(_VEC1, _VEC2, _VEC3) _builtin_add4(_VEC1, _VEC2, _VEC3) +# define sub4(_VEC1, _VEC2, _VEC3) _builtin_sub4(_VEC1, _VEC2, _VEC3) + +# define ld_ext(_MAT) _builtin_ld_ext(_MAT) +# define st_ext(_MAT) _builtin_st_ext(_MAT) + # define fipr(_VEC1, _VEC2) _builtin_fipr(_VEC1, _VEC2) # define ftrv(_VEC1, _VEC2) _builtin_ftrv(_VEC1, _VEC2) # define ftrvadd(_VEC1, _VEC2, _VEC3) _builtin_ftrvadd(_VEC1, _VEC2, _VEC3) # define ftrvsub(_VEC1, _VEC2, _VEC3) _builtin_ftrvsub(_VEC1, _VEC2, _VEC3) -# elif defined(__GNUC__) -# define fipr(_VEC1, _VEC2) asm("fipr %0, %1"::"r"(vec1), "r"(vec2)) -# define ftrv(_VEC1, _VEC2) asm("ftrv %0, %1"::"r"(vec1), "r"(vec2)) -# define ftrvadd(_VEC1, _VEC2, _VEC3) ((void)0) /* TODO */ -# define ftrvsub(_VEC1, _VEC2, _VEC3) ((void)0) /* TODO */ -# endif -/* ************************************************************************** */ -/* Matrix-related things */ -/* ************************************************************************** */ -/* These things are <TODO> */ - -# if defined(__HITACHI__) # define mtrx4mul(_MAT1, _MAT2) _builtin_mtrx4mul(_MAT1, _MAT2) # define mtrx4muladd(_MAT1, _MAT2, _MAT3) \ _builtin_mtrx4muladd(_MAT1, _MAT2, _MAT3) # define mtrx4mulsub(_MAT1, _MAT2, _MAT3) \ _builtin_mtrx4mulsub(_MAT1, _MAT2, _MAT3) -# define ld_ext(_MAT) _builtin_ld_ext(_MAT) -# define st_ext(_MAT) _builtin_st_ext(_MAT) - -# define add4(_VEC1, _VEC2, _VEC3) _builtin_add4(_VEC1, _VEC2, _VEC3) -# define sub4(_VEC1, _VEC2, _VEC3) _builtin_sub4(_VEC1, _VEC2, _VEC3) - -# define trace(_V) _builtin_trace(_V) +# elif defined(__GNUC__) +static __inline void __add4(float __vec1[4], float __vec2[4], float __vec3[4]) { + asm("fmov.s @%0+, fr0\r\n" + "fmov.s @%0+, fr1\r\n" + "fmov.s @%0+, fr2\r\n" + "fmov.s @%0+, fr3\r\n" + "fmov.s @%1+, fr4\r\n" + "fadd fr4, fr0\r\n" + "fmov.s @%1+, fr5\r\n" + "fadd fr5, fr1\r\n" + "fmov.s @%1+, fr6\r\n" + "fadd fr6, fr2\r\n" + "fmov.s @%1+, fr7\r\n" + "fadd fr7, fr3\r\n" + "fmov.s fr3, @-%2\r\n" + "fmov.s fr2, @-%2\r\n" + "fmov.s fr1, @-%2\r\n" + "fmov.s fr0, @-%2\r\n" + :: "r"(__vec1), "r"(__vec2), "r"(&__vec3[4])); } +static __inline void __sub4(float __vec1[4], float __vec2[4], float __vec3[4]) { + asm("fmov.s @%0+, fr0\r\n" + "fmov.s @%0+, fr1\r\n" + "fmov.s @%0+, fr2\r\n" + "fmov.s @%0+, fr3\r\n" + "fmov.s @%1+, fr4\r\n" + "fsub fr4, fr0\r\n" + "fmov.s @%1+, fr5\r\n" + "fsub fr5, fr1\r\n" + "fmov.s @%1+, fr6\r\n" + "fsub fr6, fr2\r\n" + "fmov.s @%1+, fr7\r\n" + "fsub fr7, fr3\r\n" + "fmov.s fr3, @-%2\r\n" + "fmov.s fr2, @-%2\r\n" + "fmov.s fr1, @-%2\r\n" + "fmov.s fr0, @-%2\r\n" + :: "r"(__vec1), "r"(__vec2), "r"(&__vec3[4])); } +static __inline void __ld_ext(float __vec[4][4]) { + asm("frchg\r\n" + "fmov.s @%0+, fr0 \r\n" + "fmov.s @%0+, fr1 \r\n" + "fmov.s @%0+, fr2 \r\n" + "fmov.s @%0+, fr3 \r\n" + "fmov.s @%1+, fr4 \r\n" + "fmov.s @%1+, fr5 \r\n" + "fmov.s @%1+, fr6 \r\n" + "fmov.s @%1+, fr7 \r\n" + "fmov.s @%2+, fr8 \r\n" + "fmov.s @%2+, fr9 \r\n" + "fmov.s @%2+, fr10\r\n" + "fmov.s @%2+, fr11\r\n" + "fmov.s @%3+, fr12\r\n" + "fmov.s @%3+, fr13\r\n" + "fmov.s @%3+, fr14\r\n" + "fmov.s @%3+, fr15\r\n" + "frchg" + :: "r"(__vec[0]), "r"(__vec[1]), "r"(__vec[2]), "r"(__vec[3])); } +static __inline void __st_ext(float __vec[4][4]) { + asm("frchg\r\n" + "fmov.s fr15, @-%3\r\n" + "fmov.s fr14, @-%3\r\n" + "fmov.s fr13, @-%3\r\n" + "fmov.s fr12, @-%3\r\n" + "fmov.s fr11, @-%2\r\n" + "fmov.s fr10, @-%2\r\n" + "fmov.s fr9 , @-%2\r\n" + "fmov.s fr8 , @-%2\r\n" + "fmov.s fr7 , @-%1\r\n" + "fmov.s fr6 , @-%1\r\n" + "fmov.s fr5 , @-%1\r\n" + "fmov.s fr4 , @-%1\r\n" + "fmov.s fr3 , @-%0\r\n" + "fmov.s fr2 , @-%0\r\n" + "fmov.s fr1 , @-%0\r\n" + "fmov.s fr0 , @-%0\r\n" + "frchg" + :: "r"(&__vec[0][4]), "r"(&__vec[1][4]), "r"(&__vec[2][4]), + "r"(&__vec[3][4])); } +static __inline void __fipr(float __vec1[4], float __vec2[4]) { + asm("fmov.s @%0+, fr0\r\n" + "fmov.s @%0+, fr1\r\n" + "fmov.s @%0+, fr2\r\n" + "fmov.s @%0+, fr3\r\n" + "fmov.s @%1+, fr4\r\n" + "fmov.s @%1+, fr5\r\n" + "fmov.s @%1+, fr6\r\n" + "fmov.s @%1+, fr7\r\n" + :: "r"(__vec1), "r"(__vec2)); } +static __inline void __ftrv(float __vec1[4], float __vec2[4]) { + asm("fmov.s @%0+, fr0\r\n" + "fmov.s @%0+, fr1\r\n" + "fmov.s @%0+, fr2\r\n" + "fmov.s @%0+, fr3\r\n" + "ftrv xmtrx, fv0\r\n" + "fmov.s fr3, @-%1\r\n" + "fmov.s fr2, @-%1\r\n" + "fmov.s fr1, @-%1\r\n" + "fmov.s fr0, @-%1\r\n" + :: "r"(__vec1), "r"(&__vec2[4])); } +static __inline void __ftrvadd(float __vec1[4], float __vec2[4], + float __vec3[4]) { + asm("fmov.s @%0+, fr0\r\n" + "fmov.s @%0+, fr1\r\n" + "fmov.s @%0+, fr2\r\n" + "fmov.s @%0+, fr3\r\n" + "ftrv xmtrx, fv0\r\n" + "fmov.s @%1+, fr4\r\n" + "fmov.s @%1+, fr5\r\n" + "fmov.s @%1+, fr6\r\n" + "fadd fr6, fr2\r\n" + "fmov.s @%1+, fr7\r\n" + "fadd fr5, fr1\r\n" + "fadd fr4, fr0\r\n" + "fadd fr7, fr3\r\n" + "fmov.s fr3, @-%2\r\n" + "fmov.s fr2, @-%2\r\n" + "fmov.s fr1, @-%2\r\n" + "fmov.s fr0, @-%2\r\n" + :: "r"(__vec1), "r"(__vec2), "r"(&__vec3[4])); } +static __inline void __ftrvsub(float __vec1[4], float __vec2[4], + float __vec3[4]) { + asm("fmov.s @%0+, fr0\r\n" + "fmov.s @%0+, fr1\r\n" + "fmov.s @%0+, fr2\r\n" + "fmov.s @%0+, fr3\r\n" + "ftrv xmtrx, fv0\r\n" + "fmov.s @%1+, fr4\r\n" + "fmov.s @%1+, fr5\r\n" + "fmov.s @%1+, fr6\r\n" + "fsub fr6, fr2\r\n" + "fmov.s @%1+, fr7\r\n" + "fsub fr5, fr1\r\n" + "fsub fr4, fr0\r\n" + "fsub fr7, fr3\r\n" + "fmov.s fr3, @-%2\r\n" + "fmov.s fr2, @-%2\r\n" + "fmov.s fr1, @-%2\r\n" + "fmov.s fr0, @-%2\r\n" + :: "r"(__vec1), "r"(__vec2), "r"(&__vec3[4])); } + +static __inline void __mtrx4mul(float __mat1[4][4], float __mat2[4][4]) { + __ftrv(__mat1[0], __mat2[0]); + __ftrv(__mat1[1], __mat2[1]); + __ftrv(__mat1[2], __mat2[2]); + __ftrv(__mat2[3], __mat2[3]); } +static __inline void __mtrx4muladd(float __mat1[4][4], float __mat2[4][4], + float __mat3[4][4]) { + __ftrvadd(__mat1[0], __mat2[0], __mat3[0]); + __ftrvadd(__mat1[1], __mat2[1], __mat3[1]); + __ftrvadd(__mat1[2], __mat2[2], __mat3[2]); + __ftrvadd(__mat1[3], __mat2[3], __mat3[3]); } +static __inline void __mtrx4mulsub(float __mat1[4][4], float __mat2[4][4], + float __mat3[4][4]) { + __ftrvsub(__mat1[0], __mat2[0], __mat3[0]); + __ftrvsub(__mat1[1], __mat2[1], __mat3[1]); + __ftrvsub(__mat1[2], __mat2[2], __mat3[2]); + __ftrvsub(__mat1[3], __mat2[3], __mat3[3]); } + +# define add4(_VEC1, _VEC2, _VEC3) __add4(_VEC1, _VEC2, _VEC3) +# define sub4(_VEC1, _VEC2, _VEC3) __sub4(_VEC1, _VEC2, _VEC3) + +# define ld_ext(_MAT) __ld_ext(_MAT) +# define st_ext(_MAT) __st_ext(_MAT) + +# define fipr(_VEC1, _VEC2) __fipr(_VEC1, _VEC2) +# define ftrv(_VEC1, _VEC2) __ftrv(_VEC1, _VEC2) +# define ftrvadd(_VEC1, _VEC2, _VEC3) __ftrvadd(_VEC1, _VEC2, _VEC3) +# define ftrvsub(_VEC1, _VEC2, _VEC3) __ftrvsub(_VEC1, _VEC2, _VEC3) + +# define mtrx4mul(_MAT1, _MAT2) __mtrx4mul(_MAT1, _MAT2) +# define mtrx4muladd(_MAT1, _MAT2, _MAT3) __mtrx4muladd(_MAT1, _MAT2, _MAT3) +# define mtrx4mulsub(_MAT1, _MAT2, _MAT3) __mtrx4mulsub(_MAT1, _MAT2, _MAT3) +# endif +/* ************************************************************************** */ +/* Trace a variable on the emulator */ +/* ************************************************************************** */ +/* Yes, this instruction is specific to the emulator. + * Why not after all? */ +# if defined(__HITACHI__) +# define trace(_VAR) _builtin_trace(_VAR) # elif defined(__GNUC__) -# define mtrx4mul(_MAT1, _MAT2) ((void)0) /* TODO */ -# define mtrx4muladd(_MAT1, _MAT2, _MAT3) ((void)0) /* TODO */ -# define mtrx4mulsub(_MAT1, _MAT2, _MAT3) ((void)0) /* TODO */ -# define ld_ext(_MAT) ((void)0) /* TODO */ -# define st_ext(_MAT) ((void)0) /* TODO */ -# define add4(_VEC1, _VEC2, _VEC3) ((void)0) /* TODO */ -# define sub4(_VEC1, _VEC2, _VEC3) ((void)0) /* TODO */ -# define trace(_V) ((void)0) /* TODO */ +# define trace(_VAR) asm("trace %0"::"r"(_VAR)) # endif /* ************************************************************************** */ /* Do nothing */ @@ -435,8 +601,22 @@ static __inline int32_t __dmuls_l(int32_t __data1, int32_t __data2) { # define fsrra(_DATA) _builtin_fsrra(_DATA) # elif defined(__GNUC__) -# define fsca(_ANGLE, _SINV, _COSV) ((void)0) /* TODO */ -# define fsrra(_DATA) ((void)0) /* TODO */ +static __inline void __fsca(int32_t __angle, float *__sinv, float *__cosv) { + asm("lds %0, fpul\r\n" + "fsca fpul, dr0\r\n" + "fmov.s fr0, @%1\r\n" + "fmov.s fr1, @%2\r\n" + :: "r"(__angle), "r"(__sinv), "r"(__cosv)); } +static __inline float __fsrra(float __data) { + float __result = 0; + asm("fmov.s @%1, fr9\r\n" + "fsrra fr9\r\n" + "fmov.s fr9, @%0" + :: "r"(&__result), "r"(&__data)); + return (__result); } + +# define fsca(_ANGLE, _SINV, _COSV) __fsca(_ANGLE, _SINV, _COSV) +# define fsrra(_DATA) __fsrra(_DATA) # endif /* ************************************************************************** */ /* ??? (TODO) */ |