TkN 2.2
Toolkit for Nuclei
tkstring.cpp
1/********************************************************************************
2 * Copyright (c) : Université de Lyon 1, CNRS/IN2P3, UMR5822, *
3 * IP2I, F-69622 Villeurbanne Cedex, France *
4 * Normandie Université, ENSICAEN, UNICAEN, CNRS/IN2P3, *
5 * LPC Caen, F-14000 Caen, France *
6 * Contibutor(s) : *
7 * Jérémie Dudouet jeremie.dudouet@cnrs.fr [2020] *
8 * Diego Gruyer diego.gruyer@cnrs.fr [2020] *
9 * *
10 * This software is governed by the CeCILL-B license under French law and *
11 * abiding by the rules of distribution of free software. You can use, *
12 * modify and/ or redistribute the software under the terms of the *
13 * CeCILL-B license as circulated by CEA, CNRS and INRIA at the following *
14 * URL \"http://www.cecill.info\". *
15 * *
16 * As a counterpart to the access to the source code and rights to copy, *
17 * modify and redistribute granted by the license, users are provided *
18 * only with a limited warranty and the software's author, the holder of *
19 * the economic rights, and the successive licensors have only limited *
20 * liability. *
21 * *
22 * In this respect, the user's attention is drawn to the risks associated *
23 * with loading, using, modifying and/or developing or reproducing the *
24 * software by the user in light of its specific status of free software, *
25 * that may mean that it is complicated to manipulate, and that also *
26 * therefore means that it is reserved for developers and experienced *
27 * professionals having in-depth computer knowledge. Users are therefore *
28 * encouraged to load and test the software's suitability as regards *
29 * their requirements in conditions enabling the security of their *
30 * systems and/or data to be ensured and, more generally, to use and *
31 * operate it in the same conditions as regards security. *
32 * *
33 * The fact that you are presently reading this means that you have had *
34 * knowledge of the CeCILL-B license and that you accept its terms. *
35 ********************************************************************************/
36#include "tkstring.h"
37
38#include <list>
39#include <cstdarg>
40#include <iostream>
41#include <cmath>
42#include <algorithm>
43#include <iostream>
44
45namespace tkn {
55}
56
57using namespace tkn;
58
59tkstring& tkstring::to_lower()
60{
61 std::transform(begin(), end(), begin(),[](unsigned char _c){ return std::tolower(_c); });
62 return *this;
63}
64
66{
67 std::transform(begin(), end(), begin(),[](unsigned char _c){ return std::toupper(_c); });
68 return *this;
69}
70
82{
83 const char* cp = data();
84 size_t len = length();
85 if (len == 0) return false;
86 int b = 0, d = 0;
87 for (size_t i = 0; i < len; ++i) {
88 if (cp[i] != ' ' && !isdigit(cp[i])) return false;
89 if (cp[i] == ' ') b++;
90 if (isdigit(cp[i])) d++;
91 }
92 return !(b && !d);
93}
94
111{
112 //we first check if we have an integer, in this case, IsDigit() will be true straight away
113 if (is_digit()) return true;
114
115 tkstring tmp = *this;
116 //now we look for occurrences of '.', ',', e', 'E', '+', '-' and replace each
117 //with ' '. if it is a floating point, IsDigit() will then return kTRUE
118 int i_dot, i_e, i_plus, i_minus, i_comma;
119
120 i_dot = tmp.index(".");
121 if (i_dot > -1) tmp.replace(i_dot, 1, " ", 1);
122 i_comma = tmp.index(",");
123 if (i_comma > -1) tmp.replace(i_comma, 1, " ", 1);
124 i_e = tmp.index("e");
125 if (i_e > -1)
126 tmp.replace(i_e, 1, " ", 1);
127 else {
128 //try for a capital "E"
129 i_e = tmp.index("E");
130 if (i_e > -1) tmp.replace(i_e, 1, " ", 1);
131 }
132 i_plus = tmp.index("+");
133 if (i_plus > -1) tmp.replace(i_plus, 1, " ", 1);
134 i_minus = tmp.index("-");
135 if (i_minus > -1) tmp.replace(i_minus, 1, " ", 1);
136
137 //test if it is now uniquely composed of numbers
138 return tmp.is_digit();
139}
140
141//tkstring tkstring::energy_to_string(double _val, int _precision)
142//{
143// int exp_value = (_val == 0) ? 0 : 1 + (int)std::floor(std::log10(std::fabs(_val) ) );
144// int exp_error = (_precision == 0) ? 0 : 1 + (int)std::floor(std::log10(std::fabs(_precision) ) );
145
146// std::ostringstream os;
147
148// if(_precision<0) os.precision(exp_value+exp_error);
149// else os.precision(exp_value);
150// os << _val;
151
152// tkstring result = os.str();
153// return result;
154//}
155
156//tkstring tkstring::energy_error_to_string(double _val, int _precision)
157//{
158// int exp_error = (_precision == 0) ? 0 : 1 + (int)std::floor(std::log10(std::fabs(_precision) ) );
159// if(_precision>0) exp_error = 1 + (int)std::floor(std::log10(std::fabs(_val) ) );
160// std::ostringstream os;
161
162// os.precision(exp_error);
163// os << _val;
164
165// tkstring result = os.str();
166// return result;
167//}
168
169
175int tkstring::atoi() const
176{
177 int end = index(" ");
178 //if no whitespaces in string, just use atoi()
179 if (end == -1)
180 return std::atoi(data());
181
182 tkstring tmp = *this;
183 tmp.erase(end,1);
184 return std::atoi(tmp.data());
185}
186
191double tkstring::atof() const
192{
193 //look for a comma and some whitespace
194 int comma = index(",");
195 int end = index(" ");
196 //if no commas & no whitespace in string, just use atof()
197 if (comma == -1 && end == -1)
198 return std::atof(data());
199 tkstring tmp = *this;
200 if (comma > -1) {
201 //replace comma with full stop
202 tmp.replace(comma, 1, ".");
203 }
204 //no whitespace ?
205 if (end == -1)
206 return std::atof(tmp.data());
207 //remove whitespace
208 tmp.erase(end,1);
209
210 return std::atof(tmp.data());
211}
212
218size_t tkstring::index(const char *_s, size_t _pos, ECaseCompare _cmp) const
219{
220 if(_cmp == ECaseCompare::kExact) return find(_s,_pos);
221
222 tkstring copy = *this;
223 copy.to_lower();
224 tkstring test(_s);
225 test.to_lower();
226 return copy.find(test,_pos);
227}
228
234bool tkstring::equal_to(const char *_s, ECaseCompare _cmp) const
235{
236 if (_cmp == kExact)
237 return strcmp(_s, data()) == 0;
238 return strcasecmp(_s, data()) == 0;
239}
240
241bool tkstring::ends_with(const char *_s, ECaseCompare _cmp) const
242{
243 if (!_s) return true;
244
245 size_t l = strlen(_s);
246 if (l > length()) return false;
247 const char *s2 = data() + length() - l;
248
249 if (_cmp == kExact)
250 return strcmp(_s, s2) == 0;
251 return strcasecmp(_s, s2) == 0;
252}
253
254std::vector<tkstring> tkstring::tokenize(const tkstring &_delim) const
255{
256 std::vector<tkstring> tokens;
257 std::list<int> splitIndex;
258
259 size_t i, start, nrDiff = 0;
260
261 for (i = 0; i < _delim.length(); i++) {
262 start = 0;
263 while (start < length()) {
264 size_t pos = find(_delim.at(i), start);
265 if (pos == npos) break;
266 splitIndex.push_back(pos);
267 start = pos + 1;
268 }
269 if (start > 0) nrDiff++;
270 }
271 splitIndex.push_back(length());
272
273 if (nrDiff > 1)
274 splitIndex.sort();
275
276 start = -1;
277 std::list<int>::const_iterator it;
278#ifndef R__HPUX
279 for (it = splitIndex.begin(); it != splitIndex.end(); ++it) {
280#else
281 for (it = splitIndex.begin(); it != (std::list<int>::const_iterator) splitIndex.end(); ++it) {
282#endif
283 size_t stop = *it;
284 if (stop - 1 >= start + 1) {
285 tkstring tok(substr(start+1, stop-start-1));
286 if(tok.length()) tokens.push_back(tok);
287 }
288 start = stop;
289 }
290
291 return tokens;
292}
293
294std::vector<tkstring> tkstring::tokenize_from_string(const tkstring &_delim) const {
295 std::vector<tkstring> tokens;
296 size_t start = 0, pos = 0;
297
298 // Boucle tant qu'on trouve le délimiteur complet
299 while ((pos = find(_delim, start)) != npos) {
300 tkstring token = substr(start, pos - start);
301 if (token.length()) {
302 tokens.push_back(token);
303 }
304 // On avance de la longueur du délimiteur
305 start = pos + _delim.length();
306 }
307
308 // Ajoute le dernier segment (après le dernier délimiteur)
309 tkstring token = substr(start);
310 if (token.length()) {
311 tokens.push_back(token);
312 }
313
314 return tokens;
315}
316
317tkstring& tkstring::replace_all(const char *_s1, size_t _ls1, const char *_s2, size_t _ls2)
318{
319 if (_s1 && _ls1 > 0) {
320 size_t index = 0;
321 while ((index = find(_s1,index,_ls1)) != npos) {
322 replace(index, _ls1, _s2, _ls2);
323 index += _ls2;
324 }
325 }
326 return *this;
327}
328
330{
331 std::size_t found = find_last_of(_s1);
332 tkstring name = substr(found+1);
333
334 return name;
335}
336
338{
339 std::size_t found = find_last_of(_s1);
340 tkstring name = substr(0,found);
341
342 return name;
343}
344
345tkstring tkstring::Form(const char * _format, ...)
346{
347 static char *buffer;
348 static size_t buffer_size;
349
350 va_list argptr;
351 va_start(argptr, _format);
352 size_t length = vsnprintf(buffer, buffer_size, _format, argptr);
353 va_end(argptr);
354
355 if (length + 1 > buffer_size) {
356 buffer_size = length + 1;
357 char *tmp = static_cast<char*>(realloc(buffer, buffer_size));
358 if(tmp) buffer = tmp;
359
360 va_start(argptr, _format);
361 vsnprintf(buffer, buffer_size, _format, argptr);
362 va_end(argptr);
363 }
364
365 tkstring result(buffer);
366
367 return result;
368}
369
371{
372 tkstring temp(*this);
373 return temp;
374}
375
377{
378 for(size_t i=0 ; i<length() ; i++) {
379 if ((*this)[i] >= 'a' && (*this)[i] <= 'z') {
380 (*this)[i] -= ('a' - 'A');
381 return *this;
382 }
383 }
384 return *this;
385}
386
388{
389 tkstring result{};
390
391 const char *cp = data();
392 size_t len = length();
393
394 for (size_t i = 0; i < len; ++i)
395 if (isalpha(cp[i]))
396 result += cp[i];
397 return result;
398}
399
400
402{
403 tkstring result{};
404
405 const char *cp = data();
406 size_t len = length();
407
408 for (size_t i = 0; i < len; ++i)
409 if (!isalpha(cp[i]))
410 result += cp[i];
411 return result;
412}
413
421{
422 const char *cp = data();
423 size_t len = length();
424 if (len == 0) return false;
425 for (size_t i = 0; i < len; ++i)
426 if (!isalpha(cp[i]))
427 return false;
428 return true;
429}
430
431const char* tkstring::form(const char * _format, ...)
432{
433 static char *buffer;
434 static size_t buffer_size;
435
436 va_list argptr;
437 va_start(argptr, _format);
438 size_t length = vsnprintf(buffer, buffer_size, _format, argptr);
439 va_end(argptr);
440
441 if (length + 1 > buffer_size) {
442 buffer_size = length + 1;
443 char *tmp = static_cast<char*>(realloc(buffer, buffer_size));
444 if(tmp) buffer = tmp;
445
446 va_start(argptr, _format);
447 vsnprintf(buffer, buffer_size, _format, argptr);
448 va_end(argptr);
449 }
450
451 return buffer;
452}
453
454tkstring::tkstring(double _value, double _error): std::string("")
455{
456 double y = _value;
457 double ey = _error;
458
459 tkstring sy = Form("%1.2e", y);
460 tkstring sey = Form("%1.1e", ey);
461
462 tkstring sy_dec, sy_exp, sey_dec, sey_exp;
463 double y_dec, ey_dec;
464 int y_exp, ey_exp;
465
466 //Recup de la valeur y
467 std::vector<tkstring> loa_y = sy.tokenize("e");
468 sy_dec = loa_y.front();
469 sy_exp = loa_y.back();
470
471 y_dec = sy_dec.atof();
472 y_exp = sy_exp.atoi();
473
474 //Recup de la valeur ey
475 std::vector<tkstring> loa_ey = sey.tokenize("e");
476
477 sey_dec = loa_ey.front();
478 sey_exp = loa_ey.back();
479
480 ey_dec = sey_dec.atof();
481 ey_exp = sey_exp.atoi();
482
483 double err = ey_dec * pow(10., ey_exp - y_exp);
484 tkstring s;
485
486 if (!Form("%1.2g", y_dec).contains(".") && err >= 1) {
487
488 if (!Form("%1.2g", err).contains(".")) {
489 if (y_exp == ey_exp) s = Form("%1.2g.0(%g.0).10$^{%d}$", y_dec, ey_dec, y_exp);
490 else s = Form("%1.3g.0(%g.0).10$^{%d}$", y_dec, err, y_exp);
491 } else if (Form("%1.2g", err) == Form("%1.1g", err) && Form("%1.2g", err).contains(".")) {
492 if (y_exp == ey_exp) s = Form("%1.2g.0(%g0).10$^{%d}$", y_dec, ey_dec, y_exp);
493 else s = Form("%1.3g.0(%g0).10$^{%d}$", y_dec, err, y_exp);
494 } else {
495 if (y_exp == ey_exp) s = Form("%1.2g.0(%g).10$^{%d}$", y_dec, ey_dec, y_exp);
496 else s = Form("%1.3g.0(%g).10$^{%d}$", y_dec, err, y_exp);
497 }
498 } else if (Form("%1.3g", y_dec) == Form("%1.2g", y_dec) && Form("%1.2g", y_dec).contains(".") && err < 1) {
499 if (!Form("%1.2g", err).contains(".")) {
500 if (y_exp == ey_exp) s = Form("%1.2g0(%g.0).10$^{%d}$", y_dec, ey_dec, y_exp);
501 else s = Form("%1.3g0(%g.0).10$^{%d}$", y_dec, err, y_exp);
502 } else if (Form("%1.2g", err) == Form("%1.1g", err) && Form("%1.2g", err).contains(".")) {
503 if (y_exp == ey_exp) s = Form("%1.2g0(%g0).10$^{%d}$", y_dec, ey_dec, y_exp);
504 else s = Form("%1.3g0(%g0).10$^{%d}$", y_dec, err, y_exp);
505 } else {
506 if (y_exp == ey_exp) s = Form("%1.2g0(%g).10$^{%d}$", y_dec, ey_dec, y_exp);
507 else s = Form("%1.3g0(%g).10$^{%d}$", y_dec, err, y_exp);
508 }
509 } else if (!Form("%1.2g", err).contains(".")) {
510 if (y_exp == ey_exp) s = Form("%1.2g(%g.0).10$^{%d}$", y_dec, ey_dec, y_exp);
511 else s = Form("%1.3g(%g.0).10$^{%d}$", y_dec, err, y_exp);
512 } else if (Form("%1.2g", err) == Form("%1.1g", err) && Form("%1.2g", err).contains(".")) {
513 if (y_exp == ey_exp) s = Form("%1.2g(%g0).10$^{%d}$", y_dec, ey_dec, y_exp);
514 else s = Form("%1.3g(%g0).10$^{%d}$", y_dec, err, y_exp);
515 } else {
516 if (y_exp == ey_exp) s = Form("%1.2g(%g).10$^{%d}$", y_dec, ey_dec, y_exp);
517 else s = Form("%1.3g(%g).10$^{%d}$", y_dec, err, y_exp);;
518 }
519
520 s.replace_all(".10$^{0}$", "");
521 s.replace_all("0)", ")");
522
523 *this = s;
524}
525
527{
528 // Remove any superfluous whitespace (or tabs or newlines) from string (modify string)
529 // i.e. transform " Mary Had\tA Little \n Laaaaaaaaaaaaaaaaaamb"
530 // into "Mary Had A Little Lamb"
531
532 tkstring tmp = *this;
533 tkstring tmp2;
534
535 for(const auto &s: tmp.tokenize(" \n\t")) {
536 if (tmp2.length()) tmp2 += " ";
537 tmp2 += s;
538 }
539
540 if(tmp2.begins_with(' ')) tmp2.erase(1);
541
542 *this = tmp2;
543
544 return *this;
545}
546
547int tkstring::count_string(const tkstring &_st) const
548{
549 int count = 0;
550 size_t index=0;
551
552 while ((index = find(_st,index)) != npos) {
553 index += _st.length();
554 count++;
555 }
556
557 return count;
558}
559
561{
562 // Remove any superfluous whitespace (or tabs or newlines) from string (does not modify string)
563 // i.e. transform " Mary Had\tA Little \n Laaaaaaaaaaaaaaaaaamb"
564 // into "Mary Had A Little Lamb"
565
566 tkstring tmp = *this;
567 tkstring tmp2;
568
569 for(const auto &s: tmp.tokenize(" \n\t")) {
570 if (tmp2.length()) tmp2 += " ";
571 tmp2 += s;
572 }
573
574 if(tmp2.begins_with(' ')) tmp2.erase(1);
575
576 return tmp2;
577}
578
579bool tkstring::match(const char *_pattern) const
580{
581 // Check if pattern fit the considered string
582 // As in ls shell command the * symbol represents the non discriminant part
583 // of the pattern
584 // if no * is present in the pattern, the result correspond to TString::Contains method
585 // Example KVString st(file_R45.dat);
586 // st.Match("*") -> kTRUE
587 // st.Match("file") ->kTRUE
588 // st.Match("*file*R*") ->kTRUE
589 // etc ....
590
591 tkstring pat(_pattern);
592
593 if (!pat.contains("*")) return this->contains(pat);
594 if (pat == "*") return true;
595
596 std::vector<tkstring> tok = pat.tokenize("*");
597 int n_tok = tok.size();
598 if (!pat.begins_with("*"))
599 if (!begins_with(tok.front())) {
600 return false;
601 }
602 if (!pat.ends_with("*"))
603 if (!ends_with(tok.back())) {
604 return false;
605 }
606
607 int idx = 0, num = 0;
608 for (int ii = 0; ii < n_tok; ii += 1) {
609 idx = index(tok.at(ii), idx);
610 if (idx != -1) {
611 num += 1;
612 idx++;
613 } else break;
614 }
615 return (num == n_tok);
616}
623std::istream& tkstring::read_line(std::istream& _strm, bool _skip_white)
624{
625 if(_skip_white) getline(_strm >> std::ws, *this);
626 else getline(_strm, *this);
627
628 return _strm;
629}
630
632{
633 if(error.is_empty()) return -1.;
634 return error.atof()*tkstring::get_precision(val);
635}
636
650{
651 double precision = 1.0; int expo; size_t l1,l2;
652
653 l1 = st.index(".",0,tkstring::kIgnoreCase);
654 l2 = st.index("e",0,tkstring::kIgnoreCase);
655
656 if ( l1 == std::string::npos ) { // no point
657 if ( l2 == std::string::npos ) // no exponant
658 precision = 1.0;
659 else { // exponant
660 st.erase(0,l2+1);
661 expo = st.atoi();
662 precision = pow(10.,expo);
663 }
664 } else { // one point
665 if ( l2 == std::string::npos ) { // no exponant
666 expo = - (st.size() - l1 - 1);
667 precision = pow(10.0,expo);
668 }
669 else { // exponant
670 st.erase(0,l2+1);
671 expo = st.atoi();
672 expo = - (l2 - l1 - 1) + expo;
673 precision = pow(10.0,expo);
674 }
675 }
676 if ( precision < 0 ) precision = -1.0 * precision;
677
678 return precision;
679}
680
681#ifdef HAS_ROOT
682ClassImp(tkstring);
683#endif
std::string with usefull tricks from TString (ROOT) and KVString (KaliVeda) and more....
Definition: tkstring.h:54
tkstring extract_alpha()
Returns a tkstring composed only of the alphabetic letters of the original tkstring.
Definition: tkstring.cpp:387
tkstring strip_all_extra_white_space() const
Definition: tkstring.cpp:560
tkstring copy() const
Returns a copy of this string.
Definition: tkstring.cpp:370
tkstring & to_lower()
Change all letters to lower case.
Definition: tkstring.cpp:59
static const char * form(const char *_format,...)
Definition: tkstring.cpp:431
bool is_float() const
Checks if string contains a floating point or integer number.
Definition: tkstring.cpp:110
tkstring get_last_occurence(const char *_s1)
Definition: tkstring.cpp:329
std::vector< tkstring > tokenize(const tkstring &_delim=" ") const
Create a vector of string separated by at least one delimiter.
Definition: tkstring.cpp:254
static tkstring Form(const char *_format,...)
Definition: tkstring.cpp:345
tkstring substr(size_type __pos=0, size_type __n=npos) const
Inlines.
Definition: tkstring.h:179
std::vector< tkstring > tokenize_from_string(const tkstring &_delim) const
Create a vector of string separated by a full string as delimiter.
Definition: tkstring.cpp:294
bool match(const char *_pattern) const
Definition: tkstring.cpp:579
bool is_alpha() const
Checks whether tkstring is only composed of alphabetic letters.
Definition: tkstring.cpp:420
std::istream & read_line(std::istream &_strm, bool _skip_white=true)
tkstring::read_line
Definition: tkstring.cpp:623
int atoi() const
Converts a string to integer value.
Definition: tkstring.cpp:175
bool ends_with(const char *_s, ECaseCompare _cmp=kExact) const
Definition: tkstring.cpp:241
static double get_absolute_error(tkstring val, tkstring error)
Get absolute uncertainty from value and error strings (1.27 4 -> 0.04), returns -1 in case of empty e...
Definition: tkstring.cpp:631
bool equal_to(const char *_s, ECaseCompare _cmp=kExact) const
Returns true if the string and _s are identical.
Definition: tkstring.cpp:234
size_t index(const char *_s, size_t _pos=0, ECaseCompare _cmp=kExact) const
Returns the index of the substring _s.
Definition: tkstring.cpp:218
tkstring remove_alpha()
Returns a tkstring composed only of the non alphabetic letters of the original tkstring.
Definition: tkstring.cpp:401
tkstring & remove_all_extra_white_space()
Definition: tkstring.cpp:526
bool contains(const char *_pat, ECaseCompare _cmp=kExact) const
Definition: tkstring.h:197
int count_string(const tkstring &_st) const
Definition: tkstring.cpp:547
bool begins_with(const char *_s, ECaseCompare _cmp=kExact) const
Definition: tkstring.h:185
tkstring & capitalize()
Change first letter of string from lower to upper case.
Definition: tkstring.cpp:376
static double get_precision(tkstring _st)
Extract the precision for a given ENSDF data.
Definition: tkstring.cpp:649
tkstring & replace_all(const tkstring &_s1, const tkstring &_s2)
Definition: tkstring.h:203
bool is_digit() const
Checks if all characters in string are digits (0-9) or whitespaces.
Definition: tkstring.cpp:81
tkstring remove_last_occurence(const char *_s1)
Definition: tkstring.cpp:337
double atof() const
Converts a string to double value.
Definition: tkstring.cpp:191
tkstring & to_upper()
Change all letters to upper case.
Definition: tkstring.cpp:65
Definition: tklog.cpp:39
tklog & error(tklog &log)
Definition: tklog.h:367