Skip to content

Commit feb6a72

Browse files
committed
new lexer : SINEX file format
1 parent d8d5ef9 commit feb6a72

10 files changed

Lines changed: 597 additions & 0 deletions

include/SciLexer.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@
153153
#define SCLEX_DART 138
154154
#define SCLEX_ZIG 139
155155
#define SCLEX_NIX 140
156+
#define SCLEX_SINEX 141
156157
#define SCLEX_AUTOMATIC 1000
157158
#define SCE_P_DEFAULT 0
158159
#define SCE_P_COMMENTLINE 1
@@ -2188,6 +2189,13 @@
21882189
#define SCE_NIX_KEYWORD3 14
21892190
#define SCE_NIX_KEYWORD4 15
21902191
#define SCE_NIX_STRINGEOL 16
2192+
#define SCE_SINEX_DEFAULT 0
2193+
#define SCE_SINEX_COMMENTLINE 1
2194+
#define SCE_SINEX_BLOCK_START 2
2195+
#define SCE_SINEX_BLOCK_END 3
2196+
#define SCE_SINEX_DATE 4
2197+
#define SCE_SINEX_NUMBER 5
2198+
#define SCE_SINEX_STRING 6
21912199
/* --Autogenerated -- end of section automatically generated from Scintilla.iface */
21922200

21932201
#endif

lexers/LexSINEX.cxx

Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
// Scintilla source code edit control
2+
// Encoding: UTF-8
3+
/** @file LexSINEX.cxx
4+
** Lexer for SINEX (Solution INdependent EXchange format) files
5+
** https://www.iers.org/SharedDocs/Publikationen/EN/IERS/Documents/ac/sinex/sinex_v202_pdf.pdf
6+
**
7+
** Written by Franck Reinquin
8+
**/
9+
// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
10+
// The License.txt file describes the conditions under which this software may be distributed.
11+
12+
#include <stdlib.h>
13+
#include <string.h>
14+
#include <stdio.h>
15+
#include <stdarg.h>
16+
#include <assert.h>
17+
#include <ctype.h>
18+
19+
#include <string>
20+
21+
#include "ILexer.h"
22+
#include "Scintilla.h"
23+
#include "SciLexer.h"
24+
25+
#include "LexAccessor.h"
26+
#include "Accessor.h"
27+
#include "CharacterSet.h"
28+
#include "LexerModule.h"
29+
30+
using namespace Lexilla;
31+
32+
// States when parsing a real number
33+
typedef enum {
34+
NOTHING=0, // parsing no started
35+
SIGN_1=1, // a sign (+/-) was encountered for the first time
36+
MANTISSA_1=2, // one or more consecutive digits were encountered for the first time
37+
DOT=3, // the scientific notation letter (e,E,d,D) was encountered
38+
MANTISSA_2=4, // one or more consecutive digits were encountered after a dot
39+
D_OR_E=5, // the scientific notation letter (e,E,d,D) was encountered
40+
SIGN_2=6, // a sign (+/-) was encountered for the second time
41+
EXPONENT=7 // one or more consecutive digits were encountered
42+
} E_REAL_PARSING_STATE ;
43+
44+
45+
// Check if end of line encountered. Possible terminators : '\n', '\r', '\r\n'
46+
// For '\r\n' terminators, the EOL is reached at the '\n' character.
47+
static inline bool AtEOL(Accessor &styler, Sci_PositionU i) {
48+
return (styler[i] == '\n') ||
49+
((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n'));
50+
}
51+
52+
53+
static inline bool IsCommentLine(Accessor &styler, Sci_Position line) {
54+
Sci_Position pos = styler.LineStart(line);
55+
return (styler.StyleAt(pos) == SCE_SINEX_COMMENTLINE);
56+
}
57+
58+
59+
// Check whether the string is number, either integer or float ; the
60+
// scientific representation is also accepted.
61+
// Implemented as a finite-state machine
62+
// Mostly equivalent to REGEX : ^[+-]?\d*\.?\d+([eE][+-]?\d+)?$
63+
static inline bool IsSINEXNumber(const char *text, Sci_PositionU len) {
64+
65+
E_REAL_PARSING_STATE parsingState = NOTHING ;
66+
bool firstDigitsFound = false;
67+
68+
if (len == 0)
69+
return false;
70+
71+
for (Sci_PositionU i = 0 ; i < len ; i++) {
72+
if ((text[i] == '-') || (text[i] == '+')) {
73+
// valid only at the beginning and after an exponent letter
74+
if (parsingState == NOTHING) {
75+
parsingState = SIGN_1 ;
76+
} else if (parsingState == D_OR_E) {
77+
parsingState = SIGN_2;
78+
} else {
79+
return false;
80+
}
81+
82+
} else if (text[i] == '.') {
83+
// valid only after the first digits, which can be absent (e.g. '-.12')
84+
if ((parsingState == NOTHING) || (parsingState == SIGN_1) \
85+
|| (parsingState == MANTISSA_1)) {
86+
parsingState = DOT;
87+
} else {
88+
return false;
89+
}
90+
91+
} else if ((text[i] == 'e') || (text[i] == 'E') || (text[i] == 'd') \
92+
|| (text[i] == 'D')) {
93+
// valid only after the first digits ('.e+7' is NOK)
94+
if (! firstDigitsFound) return false;
95+
if ((parsingState == MANTISSA_1) || (parsingState == MANTISSA_2) \
96+
|| (parsingState == DOT)) {
97+
parsingState = D_OR_E;
98+
} else {
99+
return false;
100+
}
101+
102+
} else if (isdigit(text[i])) {
103+
if ((parsingState == NOTHING) || (parsingState == SIGN_1) \
104+
|| (parsingState == MANTISSA_1)) {
105+
parsingState = MANTISSA_1;
106+
firstDigitsFound = true;
107+
} else if ((parsingState == DOT) || (parsingState == MANTISSA_2)) {
108+
parsingState = MANTISSA_2;
109+
firstDigitsFound = true;
110+
} else if ((parsingState == D_OR_E) || (parsingState == SIGN_2) \
111+
|| (parsingState == EXPONENT)) {
112+
parsingState = EXPONENT;
113+
} else {
114+
return false;
115+
}
116+
117+
} else {
118+
// other characters are not valid
119+
return false ;
120+
}
121+
}
122+
return (firstDigitsFound && (parsingState != D_OR_E) && (parsingState != SIGN_2));
123+
}
124+
125+
126+
// Check whether the string is a SINEX date (YY:DDD:SSSSS)
127+
// For the record : YY = 2-digit year (!), DDD = Day Of Year, SSSSS = seconds
128+
// in the day
129+
static inline bool IsSINEXDate(const char *text, Sci_PositionU len) {
130+
131+
if (len < 11) return false;
132+
return (IsADigit(text[0]) && IsADigit(text[1]) && text[2] == ':' &&
133+
IsADigit(text[3]) && IsADigit(text[4]) && IsADigit(text[5]) && text[6] == ':' &&
134+
IsADigit(text[7]) && IsADigit(text[8]) && IsADigit(text[9]) &&
135+
IsADigit(text[10]) && IsADigit(text[11]));
136+
}
137+
138+
// Find next space in the string : return an offset in the string >= start
139+
// or len if not found
140+
static Sci_PositionU FindNextSpace(const char *text,
141+
Sci_PositionU start, Sci_PositionU len) {
142+
Sci_PositionU pos ;
143+
for (pos = start ; pos < len ; pos ++ ) {
144+
if (IsASpace(text[pos]))
145+
return pos;
146+
}
147+
return pos;
148+
}
149+
150+
// Colourization logic for one line
151+
static void ColouriseSinexLine(
152+
const char *lineBuffer,
153+
Sci_PositionU lengthLine,
154+
Sci_PositionU startLine,
155+
Sci_PositionU endPos,
156+
Accessor &styler) {
157+
158+
if (lengthLine <= 0)
159+
return;
160+
161+
// comment line
162+
if (lineBuffer[0] == '*') {
163+
styler.ColourTo(endPos, SCE_SINEX_COMMENTLINE);
164+
165+
// block start (+BLOCK_NAME)
166+
} else if (lineBuffer[0] == '+') {
167+
styler.ColourTo(endPos, SCE_SINEX_BLOCK_START);
168+
169+
// block end (-BLOCK_NAME)
170+
} else if (lineBuffer[0] == '-') {
171+
styler.ColourTo(endPos, SCE_SINEX_BLOCK_END);
172+
173+
// Other lines : parse content
174+
} else {
175+
Sci_PositionU i = 0 ;
176+
Sci_PositionU nextSpace ;
177+
178+
// process word by word
179+
while ((nextSpace = FindNextSpace(lineBuffer,i,lengthLine)) < lengthLine) {
180+
// Detect dates YY:DDD:SSSSS (first test aims at speeding up detection)
181+
if (IsADigit(lineBuffer[i]) && IsSINEXDate(&lineBuffer[i], nextSpace-i)) {
182+
styler.ColourTo(startLine+nextSpace-1, SCE_SINEX_DATE);
183+
// Numbers (integers or floats, including scientific notation)
184+
} else if (IsSINEXNumber(&lineBuffer[i], nextSpace-i)) {
185+
styler.ColourTo(startLine+nextSpace-1, SCE_SINEX_NUMBER);
186+
}
187+
// consume all spaces
188+
for (i=nextSpace ; (i < lengthLine) && IsASpace(lineBuffer[i]) ; i++)
189+
;
190+
styler.ColourTo(startLine+i-1, SCE_SINEX_DEFAULT);
191+
}
192+
styler.ColourTo(endPos, SCE_SINEX_DEFAULT);
193+
}
194+
}
195+
196+
197+
// Colourization logic for a whole area
198+
// The area is split into lines which are separately processed
199+
static void ColouriseSinexDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler) {
200+
// initStyle not needed as each line is independent
201+
std::string lineBuffer;
202+
styler.StartAt(startPos);
203+
styler.StartSegment(startPos);
204+
Sci_PositionU startLine = startPos;
205+
206+
for (Sci_PositionU i = startPos; i < startPos + length; i++) {
207+
lineBuffer.push_back(styler[i]);
208+
if (AtEOL(styler, i)) {
209+
// End of line (or of line buffer) met, colourise it
210+
ColouriseSinexLine(lineBuffer.c_str(), lineBuffer.length(), startLine, i, styler);
211+
lineBuffer.clear();
212+
startLine = i + 1;
213+
}
214+
}
215+
if (!lineBuffer.empty()) { // Last line does not have ending characters
216+
ColouriseSinexLine(lineBuffer.c_str(), lineBuffer.length(), startLine, startPos + length - 1, styler);
217+
}
218+
}
219+
220+
221+
// Folding logic
222+
static void FoldSinexDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler) {
223+
224+
if (AtEOL(styler, startPos))
225+
return ;
226+
227+
bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
228+
Sci_PositionU endPos = startPos + length;
229+
int styleCurrent;
230+
231+
// One-line comments are not folded, multi-line comments may be folded
232+
// (see "fold.comment" property) : we need to check the lines before the
233+
// first line.
234+
// Possible cases :
235+
// * start of a comment block : first line = comment, 1 comment line before
236+
// * continuation of a comment block : first line = comment , >1 comment
237+
// lines before
238+
// * end of a comment line : first line != comment, >1 comment lines before
239+
Sci_Position lineCurrent = styler.GetLine(startPos);
240+
int nbCommentLines = 0 ;
241+
while (lineCurrent > 0) {
242+
if (!IsCommentLine(styler, lineCurrent-1))
243+
break ;
244+
nbCommentLines++;
245+
lineCurrent--;
246+
}
247+
248+
// Go back to the start of the comment block, if any. Level at that line
249+
// is known
250+
Sci_Position newStartPos = (nbCommentLines == 0) ? startPos : styler.LineStart(lineCurrent);
251+
252+
// Now go through the provided text
253+
int levelCurrent = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
254+
int levelPrev = levelCurrent;
255+
int levelNext = levelCurrent;
256+
int indComment = 0;
257+
258+
for (Sci_PositionU i = newStartPos; i < endPos; i++) {
259+
if (AtEOL(styler, i)) {
260+
levelCurrent = levelNext ;
261+
styleCurrent = styler.StyleAt(i);
262+
if (foldComment) {
263+
if (styleCurrent == SCE_SINEX_COMMENTLINE) {
264+
indComment++ ;
265+
if (indComment==2) {
266+
// second comment line ->increase level
267+
// (do nothing for single comment lines)
268+
levelCurrent++;
269+
}
270+
} else {
271+
// not a comment line : decrease level if it follows a
272+
// multi-line comment
273+
if (indComment >= 2) {
274+
levelCurrent--;
275+
}
276+
indComment = 0;
277+
}
278+
levelNext = levelCurrent ;
279+
}
280+
switch (styleCurrent) {
281+
case SCE_SINEX_BLOCK_START:
282+
levelNext++;
283+
break;
284+
case SCE_SINEX_BLOCK_END:
285+
levelNext--;
286+
break;
287+
}
288+
styler.SetLevel(lineCurrent, levelCurrent);
289+
290+
// now update previous line state (if header)
291+
if (levelCurrent > levelPrev) {
292+
int lev = levelPrev;
293+
lev |= SC_FOLDLEVELHEADERFLAG;
294+
//lev |= 1<<16;
295+
if (lev != styler.LevelAt(lineCurrent-1)) {
296+
styler.SetLevel(lineCurrent-1, lev);
297+
}
298+
}
299+
lineCurrent++;
300+
levelPrev = levelCurrent ;
301+
}
302+
303+
}
304+
// Fill in the real level of the next line, keeping the current flags as they will be filled in later
305+
int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
306+
styler.SetLevel(lineCurrent, levelNext | flagsNext);
307+
}
308+
309+
static const char * const sinexWordListDesc[] = {
310+
"SNX",
311+
0
312+
};
313+
314+
extern const LexerModule lmSINEX(SCLEX_SINEX, ColouriseSinexDoc, "sinex", FoldSinexDoc, sinexWordListDesc);

src/Lexilla.cxx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ extern const LexerModule lmRuby;
137137
extern const LexerModule lmRust;
138138
extern const LexerModule lmSAS;
139139
extern const LexerModule lmScriptol;
140+
extern const LexerModule lmSINEX;
140141
extern const LexerModule lmSmalltalk;
141142
extern const LexerModule lmSML;
142143
extern const LexerModule lmSorc;
@@ -290,6 +291,7 @@ void AddEachLexer() {
290291
&lmRust,
291292
&lmSAS,
292293
&lmScriptol,
294+
&lmSINEX,
293295
&lmSmalltalk,
294296
&lmSML,
295297
&lmSorc,
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
%=SNX 2.01 IGN 16:122:00000 IGN 79:215:00000 19:001:00000 C 00036 2 X V
2+
*-------------------------------------------------------------------------------
3+
+FILE/COMMENT
4+
* File created by CATREF software (Z.Altamimi)
5+
* 2-line comment
6+
-FILE/COMMENT
7+
*-------------------------------------------------------------------------------
8+
* out-of-block multi-line comment
9+
*-------------------------------------------------------------------------------
10+
+SITE/ID
11+
*CODE PT __DOMES__ T _STATION DESCRIPTION__ APPROX_LON_ APPROX_LAT_ _APP_H_
12+
PERT A 50133M001 Perth, Australia 115 53 06.9 -31 48 07.0 12.7
13+
TIDB A 50103M108 Tidbinbilla, NSW, Aust 148 58 47.9 -35 23 57.1 665.3
14+
YAR1 A 50107M004 Mingenew, Australia 115 20 49.1 -29 02 47.5 241.3
15+
CEDU A 50138M001 CEDU 50138M001 133 48 35.3 -31 51 59.9 144.7
16+
HOB2 A 50116M004 Hobart/Tasmania, Austr 147 26 19.4 -42 48 16.9 41.1
17+
-SITE/ID
18+
* out-of-block single line comment
19+
+SOLUTION/EPOCHS
20+
*Code PT SOLN T Data_start__ Data_end____ Mean_epoch__
21+
PERT A 1 C 94:004:00000 94:113:00000 94:058:43200
22+
TIDB A 1 C 94:004:00000 96:177:86389 95:090:86394
23+
YAR1 A 1 C 94:004:00000 97:230:00000 95:300:00000
24+
PERT A 2 C 94:117:00000 01:030:00000 97:256:00000
25+
CEDU A 1 C 94:136:00000 95:277:00000 95:024:00000
26+
HOB2 A 1 C 94:187:00000 97:137:00000 95:345:00000
27+
-SOLUTION/EPOCHS
28+
+SOLUTION/ESTIMATE
29+
*INDEX TYPE__ CODE PT SOLN _REF_EPOCH__ UNIT S __ESTIMATED VALUE____ _STD_DEV___
30+
1 STAX PERT A 1 10:001:00000 m 2 -.236868757866128E+07 0.74048E-03
31+
2 STAY PERT A 1 10:001:00000 m 2 0.488131661461247E+07 0.91864E-03
32+
3 STAZ PERT A 1 10:001:00000 m 2 -.334179548710761E+07 0.84688E-03
33+
4 VELX PERT A 1 10:001:00000 m/y 2 -.472916775593191E-01 0.39163E-04
34+
5 VELY PERT A 1 10:001:00000 m/y 2 0.822689567578196E-02 0.41751E-04
35+
6 VELZ PERT A 1 10:001:00000 m/y 2 0.508006951054042E-01 0.44388E-04
36+
29 VELY CEDU A 1 10:001:00000 m/y 2 0.172935957701062E-02 0.38085E-04
37+
30 VELZ CEDU A 1 10:001:00000 m/y 2 0.504090658972641E-01 0.42917E-04
38+
31 STAX HOB2 A 1 10:001:00000 m 2 -.395007186683046E+07 0.68502E-03
39+
32 STAY HOB2 A 1 10:001:00000 m 2 0.252241528744047E+07 0.60906E-03
40+
33 STAZ HOB2 A 1 10:001:00000 m 2 -.431163782526727E+07 0.78320E-03
41+
34 VELX HOB2 A 1 10:001:00000 m/y 2 -.387112208023018E-01 0.40249E-04
42+
35 VELY HOB2 A 1 10:001:00000 m/y 2 0.792637786129052E-02 0.37662E-04
43+
36 VELZ HOB2 A 1 10:001:00000 m/y 2 0.412562272254610E-01 0.45803E-04
44+
-SOLUTION/ESTIMATE

0 commit comments

Comments
 (0)