Skip to content

Commit 54d5678

Browse files
committed
new lexer : SINEX file format
1 parent 9e459ff commit 54d5678

11 files changed

+613
-0
lines changed

include/LexicalStyles.iface

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ val SCLEX_TROFF=137
149149
val SCLEX_DART=138
150150
val SCLEX_ZIG=139
151151
val SCLEX_NIX=140
152+
val SCLEX_SINEX=141
152153

153154
# When a lexer specifies its language as SCLEX_AUTOMATIC it receives a
154155
# value assigned in sequence from SCLEX_AUTOMATIC+1.
@@ -2450,3 +2451,12 @@ val SCE_NIX_KEYWORD2=13
24502451
val SCE_NIX_KEYWORD3=14
24512452
val SCE_NIX_KEYWORD4=15
24522453
val SCE_NIX_STRINGEOL=16
2454+
# Lexical states for SCLEX_SINEX
2455+
lex Sinex=SCLEX_SINEX SCE_SINEX_
2456+
val SCE_SINEX_DEFAULT=0
2457+
val SCE_SINEX_COMMENTLINE=1
2458+
val SCE_SINEX_BLOCK_START=2
2459+
val SCE_SINEX_BLOCK_END=3
2460+
val SCE_SINEX_DATE=4
2461+
val SCE_SINEX_NUMBER=5
2462+
val SCE_SINEX_STRING=6

include/SciLexer.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@
154154
#define SCLEX_DART 138
155155
#define SCLEX_ZIG 139
156156
#define SCLEX_NIX 140
157+
#define SCLEX_SINEX 141
157158
#define SCLEX_AUTOMATIC 1000
158159
#define SCE_P_DEFAULT 0
159160
#define SCE_P_COMMENTLINE 1
@@ -2189,6 +2190,13 @@
21892190
#define SCE_NIX_KEYWORD3 14
21902191
#define SCE_NIX_KEYWORD4 15
21912192
#define SCE_NIX_STRINGEOL 16
2193+
#define SCE_SINEX_DEFAULT 0
2194+
#define SCE_SINEX_COMMENTLINE 1
2195+
#define SCE_SINEX_BLOCK_START 2
2196+
#define SCE_SINEX_BLOCK_END 3
2197+
#define SCE_SINEX_DATE 4
2198+
#define SCE_SINEX_NUMBER 5
2199+
#define SCE_SINEX_STRING 6
21922200
/* --Autogenerated -- end of section automatically generated from LexicalStyles.iface */
21932201

21942202
#endif

lexers/LexSINEX.cxx

Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
// Scintilla source code edit control
2+
// Encoding: UTF-8
3+
/** @file LexSINEX.cxx
4+
** Lexer for SINEX (Solution INdependent EXchange format) files
5+
** https://www.iers.org/SharedDocs/Publikationen/EN/IERS/Documents/ac/sinex/sinex_v202_pdf.pdf
6+
**
7+
** Written by Franck Reinquin
8+
**/
9+
// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
10+
// The License.txt file describes the conditions under which this software may be distributed.
11+
12+
#include <stdlib.h>
13+
#include <string.h>
14+
#include <stdio.h>
15+
#include <stdarg.h>
16+
#include <assert.h>
17+
#include <ctype.h>
18+
19+
#include <string>
20+
21+
#include "ILexer.h"
22+
#include "Scintilla.h"
23+
#include "SciLexer.h"
24+
25+
#include "LexAccessor.h"
26+
#include "Accessor.h"
27+
#include "CharacterSet.h"
28+
#include "LexerModule.h"
29+
30+
using namespace Lexilla;
31+
32+
namespace {
33+
// Use an unnamed namespace to protect the functions and classes from name conflicts
34+
35+
36+
// States when parsing a real number
37+
typedef enum {
38+
NOTHING=0, // parsing no started
39+
SIGN_1=1, // a sign (+/-) was encountered for the first time
40+
MANTISSA_1=2, // one or more consecutive digits were encountered for the first time
41+
DOT=3, // the scientific notation letter (e,E,d,D) was encountered
42+
MANTISSA_2=4, // one or more consecutive digits were encountered after a dot
43+
D_OR_E=5, // the scientific notation letter (e,E,d,D) was encountered
44+
SIGN_2=6, // a sign (+/-) was encountered for the second time
45+
EXPONENT=7 // one or more consecutive digits were encountered
46+
} E_REAL_PARSING_STATE ;
47+
48+
49+
// Check if end of line encountered. Possible terminators : '\n', '\r', '\r\n'
50+
// For '\r\n' terminators, the EOL is reached at the '\n' character.
51+
inline bool AtEOL(Accessor &styler, Sci_PositionU i) {
52+
return (styler[i] == '\n') ||
53+
((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n'));
54+
}
55+
56+
57+
inline bool IsCommentLine(Accessor &styler, Sci_Position line) {
58+
Sci_Position pos = styler.LineStart(line);
59+
return (styler.StyleAt(pos) == SCE_SINEX_COMMENTLINE);
60+
}
61+
62+
63+
// Check whether the string is number, either integer or float ; the
64+
// scientific representation is also accepted.
65+
// Implemented as a finite-state machine
66+
// Mostly equivalent to REGEX : ^[+-]?\d*\.?\d+([eE][+-]?\d+)?$
67+
inline bool IsSINEXNumber(const char *text, Sci_PositionU len) {
68+
69+
E_REAL_PARSING_STATE parsingState = NOTHING ;
70+
bool firstDigitsFound = false;
71+
72+
if (len == 0)
73+
return false;
74+
75+
for (Sci_PositionU i = 0 ; i < len ; i++) {
76+
if ((text[i] == '-') || (text[i] == '+')) {
77+
// valid only at the beginning and after an exponent letter
78+
if (parsingState == NOTHING) {
79+
parsingState = SIGN_1 ;
80+
} else if (parsingState == D_OR_E) {
81+
parsingState = SIGN_2;
82+
} else {
83+
return false;
84+
}
85+
86+
} else if (text[i] == '.') {
87+
// valid only after the first digits, which can be absent (e.g. '-.12')
88+
if ((parsingState == NOTHING) || (parsingState == SIGN_1) \
89+
|| (parsingState == MANTISSA_1)) {
90+
parsingState = DOT;
91+
} else {
92+
return false;
93+
}
94+
95+
} else if ((text[i] == 'e') || (text[i] == 'E') || (text[i] == 'd') \
96+
|| (text[i] == 'D')) {
97+
// valid only after the first digits ('.e+7' is NOK)
98+
if (! firstDigitsFound) return false;
99+
if ((parsingState == MANTISSA_1) || (parsingState == MANTISSA_2) \
100+
|| (parsingState == DOT)) {
101+
parsingState = D_OR_E;
102+
} else {
103+
return false;
104+
}
105+
106+
} else if (isdigit(text[i])) {
107+
if ((parsingState == NOTHING) || (parsingState == SIGN_1) \
108+
|| (parsingState == MANTISSA_1)) {
109+
parsingState = MANTISSA_1;
110+
firstDigitsFound = true;
111+
} else if ((parsingState == DOT) || (parsingState == MANTISSA_2)) {
112+
parsingState = MANTISSA_2;
113+
firstDigitsFound = true;
114+
} else if ((parsingState == D_OR_E) || (parsingState == SIGN_2) \
115+
|| (parsingState == EXPONENT)) {
116+
parsingState = EXPONENT;
117+
} else {
118+
return false;
119+
}
120+
121+
} else {
122+
// other characters are not valid
123+
return false ;
124+
}
125+
}
126+
return (firstDigitsFound && (parsingState != D_OR_E) && (parsingState != SIGN_2));
127+
}
128+
129+
130+
// Check whether the string is a SINEX date (YY:DDD:SSSSS)
131+
// For the record : YY = 2-digit year (!), DDD = Day Of Year, SSSSS = seconds
132+
// in the day
133+
inline bool IsSINEXDate(const char *text, Sci_PositionU len) {
134+
135+
if (len < 11) return false;
136+
return (IsADigit(text[0]) && IsADigit(text[1]) && text[2] == ':' &&
137+
IsADigit(text[3]) && IsADigit(text[4]) && IsADigit(text[5]) && text[6] == ':' &&
138+
IsADigit(text[7]) && IsADigit(text[8]) && IsADigit(text[9]) &&
139+
IsADigit(text[10]) && IsADigit(text[11]));
140+
}
141+
142+
// Find next space in the string : return an offset in the string >= start
143+
// or len if not found
144+
static Sci_PositionU FindNextSpace(const char *text,
145+
Sci_PositionU start, Sci_PositionU len) {
146+
Sci_PositionU pos ;
147+
for (pos = start ; pos < len ; pos ++ ) {
148+
if (IsASpace(text[pos]))
149+
return pos;
150+
}
151+
return pos;
152+
}
153+
154+
// Colourization logic for one line
155+
void ColouriseSinexLine(
156+
const char *lineBuffer,
157+
Sci_PositionU lengthLine,
158+
Sci_PositionU startLine,
159+
Sci_PositionU endPos,
160+
Accessor &styler) {
161+
162+
if (lengthLine <= 0)
163+
return;
164+
165+
// comment line
166+
if (lineBuffer[0] == '*') {
167+
styler.ColourTo(endPos, SCE_SINEX_COMMENTLINE);
168+
169+
// block start (+BLOCK_NAME)
170+
} else if (lineBuffer[0] == '+') {
171+
styler.ColourTo(endPos, SCE_SINEX_BLOCK_START);
172+
173+
// block end (-BLOCK_NAME)
174+
} else if (lineBuffer[0] == '-') {
175+
styler.ColourTo(endPos, SCE_SINEX_BLOCK_END);
176+
177+
// Other lines : parse content
178+
} else {
179+
Sci_PositionU i = 0 ;
180+
Sci_PositionU nextSpace ;
181+
182+
// process word by word
183+
while ((nextSpace = FindNextSpace(lineBuffer,i,lengthLine)) < lengthLine) {
184+
// Detect dates YY:DDD:SSSSS (first test aims at speeding up detection)
185+
if (IsADigit(lineBuffer[i]) && IsSINEXDate(&lineBuffer[i], nextSpace-i)) {
186+
styler.ColourTo(startLine+nextSpace-1, SCE_SINEX_DATE);
187+
// Numbers (integers or floats, including scientific notation)
188+
} else if (IsSINEXNumber(&lineBuffer[i], nextSpace-i)) {
189+
styler.ColourTo(startLine+nextSpace-1, SCE_SINEX_NUMBER);
190+
}
191+
// consume all spaces
192+
for (i=nextSpace ; (i < lengthLine) && IsASpace(lineBuffer[i]) ; i++)
193+
;
194+
styler.ColourTo(startLine+i-1, SCE_SINEX_DEFAULT);
195+
}
196+
styler.ColourTo(endPos, SCE_SINEX_DEFAULT);
197+
}
198+
}
199+
200+
201+
// Colourization logic for a whole area
202+
// The area is split into lines which are separately processed
203+
void ColouriseSinexDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler) {
204+
// initStyle not needed as each line is independent
205+
std::string lineBuffer;
206+
styler.StartAt(startPos);
207+
styler.StartSegment(startPos);
208+
Sci_PositionU startLine = startPos;
209+
210+
for (Sci_PositionU i = startPos; i < startPos + length; i++) {
211+
lineBuffer.push_back(styler[i]);
212+
if (AtEOL(styler, i)) {
213+
// End of line (or of line buffer) met, colourise it
214+
ColouriseSinexLine(lineBuffer.c_str(), lineBuffer.length(), startLine, i, styler);
215+
lineBuffer.clear();
216+
startLine = i + 1;
217+
}
218+
}
219+
if (!lineBuffer.empty()) { // Last line does not have ending characters
220+
ColouriseSinexLine(lineBuffer.c_str(), lineBuffer.length(), startLine, startPos + length - 1, styler);
221+
}
222+
}
223+
224+
225+
// Folding logic
226+
void FoldSinexDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler) {
227+
228+
if (AtEOL(styler, startPos))
229+
return ;
230+
231+
bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
232+
Sci_PositionU endPos = startPos + length;
233+
int styleCurrent;
234+
235+
// One-line comments are not folded, multi-line comments may be folded
236+
// (see "fold.comment" property) : we need to check the lines before the
237+
// first line.
238+
// Possible cases :
239+
// * start of a comment block : first line = comment, 1 comment line before
240+
// * continuation of a comment block : first line = comment , >1 comment
241+
// lines before
242+
// * end of a comment line : first line != comment, >1 comment lines before
243+
Sci_Position lineCurrent = styler.GetLine(startPos);
244+
int nbCommentLines = 0 ;
245+
while (lineCurrent > 0) {
246+
if (!IsCommentLine(styler, lineCurrent-1))
247+
break ;
248+
nbCommentLines++;
249+
lineCurrent--;
250+
}
251+
252+
// Go back to the start of the comment block, if any. Level at that line
253+
// is known
254+
Sci_Position newStartPos = (nbCommentLines == 0) ? startPos : styler.LineStart(lineCurrent);
255+
256+
// Now go through the provided text
257+
int levelCurrent = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
258+
int levelPrev = levelCurrent;
259+
int levelNext = levelCurrent;
260+
int indComment = 0;
261+
262+
for (Sci_PositionU i = newStartPos; i < endPos; i++) {
263+
if (AtEOL(styler, i)) {
264+
levelCurrent = levelNext ;
265+
styleCurrent = styler.StyleAt(i);
266+
if (foldComment) {
267+
if (styleCurrent == SCE_SINEX_COMMENTLINE) {
268+
indComment++ ;
269+
if (indComment==2) {
270+
// second comment line ->increase level
271+
// (do nothing for single comment lines)
272+
levelCurrent++;
273+
}
274+
} else {
275+
// not a comment line : decrease level if it follows a
276+
// multi-line comment
277+
if (indComment >= 2) {
278+
levelCurrent--;
279+
}
280+
indComment = 0;
281+
}
282+
levelNext = levelCurrent ;
283+
}
284+
switch (styleCurrent) {
285+
case SCE_SINEX_BLOCK_START:
286+
levelNext++;
287+
break;
288+
case SCE_SINEX_BLOCK_END:
289+
levelNext--;
290+
break;
291+
}
292+
styler.SetLevel(lineCurrent, levelCurrent);
293+
294+
// now update previous line state (if header)
295+
if (levelCurrent > levelPrev) {
296+
int lev = levelPrev;
297+
lev |= SC_FOLDLEVELHEADERFLAG;
298+
//lev |= 1<<16;
299+
if (lev != styler.LevelAt(lineCurrent-1)) {
300+
styler.SetLevel(lineCurrent-1, lev);
301+
}
302+
}
303+
lineCurrent++;
304+
levelPrev = levelCurrent ;
305+
}
306+
307+
}
308+
// Fill in the real level of the next line, keeping the current flags as they will be filled in later
309+
int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
310+
styler.SetLevel(lineCurrent, levelNext | flagsNext);
311+
}
312+
313+
const char * const sinexWordListDesc[] = {
314+
"SNX",
315+
0
316+
};
317+
318+
} // unnamed namespace end
319+
320+
extern const LexerModule lmSINEX(SCLEX_SINEX, ColouriseSinexDoc, "sinex", FoldSinexDoc, sinexWordListDesc);

src/Lexilla.cxx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ extern const LexerModule lmRuby;
137137
extern const LexerModule lmRust;
138138
extern const LexerModule lmSAS;
139139
extern const LexerModule lmScriptol;
140+
extern const LexerModule lmSINEX;
140141
extern const LexerModule lmSmalltalk;
141142
extern const LexerModule lmSML;
142143
extern const LexerModule lmSorc;
@@ -290,6 +291,7 @@ void AddEachLexer() {
290291
&lmRust,
291292
&lmSAS,
292293
&lmScriptol,
294+
&lmSINEX,
293295
&lmSmalltalk,
294296
&lmSML,
295297
&lmSorc,

0 commit comments

Comments
 (0)