TkN 2.5
Toolkit for Nuclei
Loading...
Searching...
No Matches
tkstring.cpp
1/********************************************************************************
2 * Copyright (c) : Université de Lyon 1, CNRS/IN2P3, UMR5822, *
3 * IP2I, F-69622 Villeurbanne Cedex, France *
4 * Normandie Université, ENSICAEN, UNICAEN, CNRS/IN2P3, *
5 * LPC Caen, F-14000 Caen, France *
6 * Contibutor(s) : *
7 * Jérémie Dudouet jeremie.dudouet@cnrs.fr [2020] *
8 * Diego Gruyer diego.gruyer@cnrs.fr [2020] *
9 * *
10 * Licensed under the MIT License <http://opensource.org/licenses/MIT>. *
11 * SPDX-License-Identifier: MIT *
12 ********************************************************************************/
13#include "tkstring.h"
14
15#include <list>
16#include <cstdarg>
17#include <iostream>
18#include <cmath>
19#include <algorithm>
20#include <iostream>
21#include <string>
22
23namespace tkn {
33}
34
35using namespace tkn;
36
37std::string tkn::wrap_text(const tkstring &_text, size_t _first_content_col, size_t _continuation_col, size_t _max_line_width)
38{
39 const size_t min_content_width = 20;
40 const size_t first_avail = _first_content_col < _max_line_width ? _max_line_width - _first_content_col : min_content_width;
41 const size_t continuation_avail = _continuation_col < _max_line_width ? _max_line_width - _continuation_col : min_content_width;
42 const std::string continuation_indent(_continuation_col, ' ');
43
44 auto wrap_paragraph = [&](const std::string &_paragraph, size_t _first_avail) {
45 std::string result;
46 size_t pos = 0;
47 bool first_chunk = true;
48 while (pos < _paragraph.size()) {
49 if (!first_chunk) result += "\n" + continuation_indent;
50 const size_t content_avail = first_chunk ? _first_avail : continuation_avail;
51 if (_paragraph.size() - pos <= content_avail) {
52 result += _paragraph.substr(pos);
53 break;
54 }
55 size_t wrap_at = _paragraph.rfind(' ', pos + content_avail);
56 if (wrap_at == std::string::npos || wrap_at <= pos) {
57 result += _paragraph.substr(pos, content_avail);
58 pos += content_avail;
59 } else {
60 result += _paragraph.substr(pos, wrap_at - pos);
61 pos = wrap_at + 1;
62 while (pos < _paragraph.size() && _paragraph[pos] == ' ') pos++;
63 }
64 first_chunk = false;
65 }
66 return result;
67 };
68
69 auto paragraphs = _text.tokenize_from_string("\n");
70 std::string wrapped;
71 bool first_para = true;
72 for (auto &para : paragraphs) {
73 if (!first_para) wrapped += "\n" + continuation_indent;
74 wrapped += wrap_paragraph(para, first_para ? first_avail : continuation_avail);
75 first_para = false;
76 }
77 return wrapped;
78}
79
81{
82 std::transform(begin(), end(), begin(),[](unsigned char _c){ return std::tolower(_c); });
83 return *this;
84}
85
87{
88 std::transform(begin(), end(), begin(),[](unsigned char _c){ return std::toupper(_c); });
89 return *this;
90}
91
101
103{
104 const char* cp = data();
105 size_t len = length();
106 if (len == 0) return false;
107 int b = 0, d = 0;
108 for (size_t i = 0; i < len; ++i) {
109 if (cp[i] != ' ' && !isdigit(cp[i])) return false;
110 if (cp[i] == ' ') b++;
111 if (isdigit(cp[i])) d++;
112 }
113 return !(b && !d);
114}
115
130
132{
133 //we first check if we have an integer, in this case, IsDigit() will be true straight away
134 if (is_digit()) return true;
135
136 tkstring tmp = *this;
137 //now we look for occurrences of '.', ',', e', 'E', '+', '-' and replace each
138 //with ' '. if it is a floating point, IsDigit() will then return kTRUE
139 int i_dot, i_e, i_plus, i_minus, i_comma;
140
141 i_dot = tmp.index(".");
142 if (i_dot > -1) tmp.replace(i_dot, 1, " ", 1);
143 i_comma = tmp.index(",");
144 if (i_comma > -1) tmp.replace(i_comma, 1, " ", 1);
145 i_e = tmp.index("e");
146 if (i_e > -1)
147 tmp.replace(i_e, 1, " ", 1);
148 else {
149 //try for a capital "E"
150 i_e = tmp.index("E");
151 if (i_e > -1) tmp.replace(i_e, 1, " ", 1);
152 }
153 i_plus = tmp.index("+");
154 if (i_plus > -1) tmp.replace(i_plus, 1, " ", 1);
155 i_minus = tmp.index("-");
156 if (i_minus > -1) tmp.replace(i_minus, 1, " ", 1);
157
158 //test if it is now uniquely composed of numbers
159 return tmp.is_digit();
160}
161
162//tkstring tkstring::energy_to_string(double _val, int _precision)
163//{
164// int exp_value = (_val == 0) ? 0 : 1 + (int)std::floor(std::log10(std::fabs(_val) ) );
165// int exp_error = (_precision == 0) ? 0 : 1 + (int)std::floor(std::log10(std::fabs(_precision) ) );
166
167// std::ostringstream os;
168
169// if(_precision<0) os.precision(exp_value+exp_error);
170// else os.precision(exp_value);
171// os << _val;
172
173// tkstring result = os.str();
174// return result;
175//}
176
177//tkstring tkstring::energy_error_to_string(double _val, int _precision)
178//{
179// int exp_error = (_precision == 0) ? 0 : 1 + (int)std::floor(std::log10(std::fabs(_precision) ) );
180// if(_precision>0) exp_error = 1 + (int)std::floor(std::log10(std::fabs(_val) ) );
181// std::ostringstream os;
182
183// os.precision(exp_error);
184// os << _val;
185
186// tkstring result = os.str();
187// return result;
188//}
189
190
195
196int tkstring::atoi() const
197{
198 int end = index(" ");
199 //if no whitespaces in string, just use atoi()
200 if (end == -1)
201 return std::atoi(data());
202
203 tkstring tmp = *this;
204 tmp.erase(end,1);
205 return std::atoi(tmp.data());
206}
207
212double tkstring::atof() const
213{
214 //look for a comma and some whitespace
215 int comma = index(",");
216 int end = index(" ");
217 //if no commas & no whitespace in string, just use atof()
218 if (comma == -1 && end == -1)
219 return std::atof(data());
220 tkstring tmp = *this;
221 if (comma > -1) {
222 //replace comma with full stop
223 tmp.replace(comma, 1, ".");
224 }
225 //no whitespace ?
226 if (end == -1)
227 return std::atof(tmp.data());
228 //remove whitespace
229 tmp.erase(end,1);
230
231 return std::atof(tmp.data());
232}
233
239size_t tkstring::index(const char *_s, size_t _pos, ECaseCompare _cmp) const
240{
241 if(_cmp == ECaseCompare::kExact) return find(_s,_pos);
242
243 tkstring tmp_copy = *this;
244 tmp_copy.to_lower();
245 tkstring test(_s);
246 test.to_lower();
247 return tmp_copy.find(test,_pos);
248}
249
254
255bool tkstring::equal_to(const char *_s, ECaseCompare _cmp) const
256{
257 if (_cmp == kExact)
258 return strcmp(_s, data()) == 0;
259 return strcasecmp(_s, data()) == 0;
260}
261
262bool tkstring::ends_with(const char *_s, ECaseCompare _cmp) const
263{
264 if (!_s) return true;
265
266 size_t l = strlen(_s);
267 if (l > length()) return false;
268 const char *s2 = data() + length() - l;
269
270 if (_cmp == kExact)
271 return strcmp(_s, s2) == 0;
272 return strcasecmp(_s, s2) == 0;
273}
274
275std::vector<tkstring> tkstring::tokenize(const tkstring &_delim) const
276{
277 std::vector<tkstring> tokens;
278 std::list<int> splitIndex;
279
280 size_t i, start, nrDiff = 0;
281
282 for (i = 0; i < _delim.length(); i++) {
283 start = 0;
284 while (start < length()) {
285 size_t pos = find(_delim.at(i), start);
286 if (pos == npos) break;
287 splitIndex.push_back(pos);
288 start = pos + 1;
289 }
290 if (start > 0) nrDiff++;
291 }
292 splitIndex.push_back(length());
293
294 if (nrDiff > 1)
295 splitIndex.sort();
296
297 start = -1;
298 std::list<int>::const_iterator it;
299#ifndef R__HPUX
300 for (it = splitIndex.begin(); it != splitIndex.end(); ++it) {
301#else
302 for (it = splitIndex.begin(); it != (std::list<int>::const_iterator) splitIndex.end(); ++it) {
303#endif
304 size_t stop = *it;
305 if (stop - 1 >= start + 1) {
306 tkstring tok(substr(start+1, stop-start-1));
307 if(tok.length()) tokens.push_back(tok);
308 }
309 start = stop;
310 }
311
312 return tokens;
313}
314
315std::vector<tkstring> tkstring::tokenize_from_string(const tkstring &_delim) const {
316 std::vector<tkstring> tokens;
317 size_t start = 0, pos = 0;
318
319 // Boucle tant qu'on trouve le délimiteur complet
320 while ((pos = find(_delim, start)) != npos) {
321 tkstring token = substr(start, pos - start);
322 if (token.length()) {
323 tokens.push_back(token);
324 }
325 // On avance de la longueur du délimiteur
326 start = pos + _delim.length();
327 }
328
329 // Ajoute le dernier segment (après le dernier délimiteur)
330 tkstring token = substr(start);
331 if (token.length()) {
332 tokens.push_back(token);
333 }
334
335 return tokens;
336}
337
338tkstring& tkstring::replace_all(const char *_s1, size_t _ls1, const char *_s2, size_t _ls2)
339{
340 if (_s1 && _ls1 > 0) {
341 size_t pos = 0;
342 while ((pos = find(_s1,pos,_ls1)) != npos) {
343 replace(pos, _ls1, _s2, _ls2);
344 pos += _ls2;
345 }
346 }
347 return *this;
348}
349
351{
352 std::size_t found = find_last_of(_s1);
353 tkstring name = substr(found+1);
354
355 return name;
356}
357
359{
360 std::size_t found = find_last_of(_s1);
361 tkstring name = substr(0,found);
362
363 return name;
364}
365
366tkstring tkstring::Form(const char * _format, ...)
367{
368 static char *buffer;
369 static size_t buffer_size;
370
371 va_list argptr;
372 va_start(argptr, _format);
373 size_t length = vsnprintf(buffer, buffer_size, _format, argptr);
374 va_end(argptr);
375
376 if (length + 1 > buffer_size) {
377 buffer_size = length + 1;
378 char *tmp = static_cast<char*>(realloc(buffer, buffer_size));
379 if(tmp) buffer = tmp;
380
381 va_start(argptr, _format);
382 vsnprintf(buffer, buffer_size, _format, argptr);
383 va_end(argptr);
384 }
385
386 tkstring result(buffer);
387
388 return result;
389}
390
392{
393 tkstring temp(*this);
394 return temp;
395}
396
398{
399 for(size_t i=0 ; i<length() ; i++) {
400 if ((*this)[i] >= 'a' && (*this)[i] <= 'z') {
401 (*this)[i] -= ('a' - 'A');
402 return *this;
403 }
404 }
405 return *this;
406}
407
409{
410 tkstring result{};
411
412 const char *cp = data();
413 size_t len = length();
414
415 for (size_t i = 0; i < len; ++i)
416 if (isalpha(cp[i]))
417 result += cp[i];
418 return result;
419}
420
421
423{
424 tkstring result{};
425
426 const char *cp = data();
427 size_t len = length();
428
429 for (size_t i = 0; i < len; ++i)
430 if (!isalpha(cp[i]))
431 result += cp[i];
432 return result;
433}
434
440
442{
443 const char *cp = data();
444 size_t len = length();
445 if (len == 0) return false;
446 for (size_t i = 0; i < len; ++i)
447 if (!isalpha(cp[i]))
448 return false;
449 return true;
450}
451
452const char* tkstring::form(const char * _format, ...)
453{
454 static char *buffer;
455 static size_t buffer_size;
456
457 va_list argptr;
458 va_start(argptr, _format);
459 size_t length = vsnprintf(buffer, buffer_size, _format, argptr);
460 va_end(argptr);
461
462 if (length + 1 > buffer_size) {
463 buffer_size = length + 1;
464 char *tmp = static_cast<char*>(realloc(buffer, buffer_size));
465 if(tmp) buffer = tmp;
466
467 va_start(argptr, _format);
468 vsnprintf(buffer, buffer_size, _format, argptr);
469 va_end(argptr);
470 }
471
472 return buffer;
473}
474
475tkstring::tkstring(double _value, double _error): std::string("")
476{
477 double y = _value;
478 double ey = _error;
479
480 tkstring sy = Form("%1.2e", y);
481 tkstring sey = Form("%1.1e", ey);
482
483 tkstring sy_dec, sy_exp, sey_dec, sey_exp;
484 double y_dec, ey_dec;
485 int y_exp, ey_exp;
486
487 //Recup de la valeur y
488 std::vector<tkstring> loa_y = sy.tokenize("e");
489 sy_dec = loa_y.front();
490 sy_exp = loa_y.back();
491
492 y_dec = sy_dec.atof();
493 y_exp = sy_exp.atoi();
494
495 //Recup de la valeur ey
496 std::vector<tkstring> loa_ey = sey.tokenize("e");
497
498 sey_dec = loa_ey.front();
499 sey_exp = loa_ey.back();
500
501 ey_dec = sey_dec.atof();
502 ey_exp = sey_exp.atoi();
503
504 double err = ey_dec * pow(10., ey_exp - y_exp);
505 tkstring s;
506
507 if (!Form("%1.2g", y_dec).contains(".") && err >= 1) {
508
509 if (!Form("%1.2g", err).contains(".")) {
510 if (y_exp == ey_exp) s = Form("%1.2g.0(%g.0).10$^{%d}$", y_dec, ey_dec, y_exp);
511 else s = Form("%1.3g.0(%g.0).10$^{%d}$", y_dec, err, y_exp);
512 } else if (Form("%1.2g", err) == Form("%1.1g", err) && Form("%1.2g", err).contains(".")) {
513 if (y_exp == ey_exp) s = Form("%1.2g.0(%g0).10$^{%d}$", y_dec, ey_dec, y_exp);
514 else s = Form("%1.3g.0(%g0).10$^{%d}$", y_dec, err, y_exp);
515 } else {
516 if (y_exp == ey_exp) s = Form("%1.2g.0(%g).10$^{%d}$", y_dec, ey_dec, y_exp);
517 else s = Form("%1.3g.0(%g).10$^{%d}$", y_dec, err, y_exp);
518 }
519 } else if (Form("%1.3g", y_dec) == Form("%1.2g", y_dec) && Form("%1.2g", y_dec).contains(".") && err < 1) {
520 if (!Form("%1.2g", err).contains(".")) {
521 if (y_exp == ey_exp) s = Form("%1.2g0(%g.0).10$^{%d}$", y_dec, ey_dec, y_exp);
522 else s = Form("%1.3g0(%g.0).10$^{%d}$", y_dec, err, y_exp);
523 } else if (Form("%1.2g", err) == Form("%1.1g", err) && Form("%1.2g", err).contains(".")) {
524 if (y_exp == ey_exp) s = Form("%1.2g0(%g0).10$^{%d}$", y_dec, ey_dec, y_exp);
525 else s = Form("%1.3g0(%g0).10$^{%d}$", y_dec, err, y_exp);
526 } else {
527 if (y_exp == ey_exp) s = Form("%1.2g0(%g).10$^{%d}$", y_dec, ey_dec, y_exp);
528 else s = Form("%1.3g0(%g).10$^{%d}$", y_dec, err, y_exp);
529 }
530 } else if (!Form("%1.2g", err).contains(".")) {
531 if (y_exp == ey_exp) s = Form("%1.2g(%g.0).10$^{%d}$", y_dec, ey_dec, y_exp);
532 else s = Form("%1.3g(%g.0).10$^{%d}$", y_dec, err, y_exp);
533 } else if (Form("%1.2g", err) == Form("%1.1g", err) && Form("%1.2g", err).contains(".")) {
534 if (y_exp == ey_exp) s = Form("%1.2g(%g0).10$^{%d}$", y_dec, ey_dec, y_exp);
535 else s = Form("%1.3g(%g0).10$^{%d}$", y_dec, err, y_exp);
536 } else {
537 if (y_exp == ey_exp) s = Form("%1.2g(%g).10$^{%d}$", y_dec, ey_dec, y_exp);
538 else s = Form("%1.3g(%g).10$^{%d}$", y_dec, err, y_exp);;
539 }
540
541 s.replace_all(".10$^{0}$", "");
542 s.replace_all("0)", ")");
543
544 *this = s;
545}
546
548{
549 // Remove any superfluous whitespace (or tabs or newlines) from string (modify string)
550 // i.e. transform " Mary Had\tA Little \n Laaaaaaaaaaaaaaaaaamb"
551 // into "Mary Had A Little Lamb"
552
553 tkstring tmp = *this;
554 tkstring tmp2;
555
556 for(const auto &s: tmp.tokenize(" \n\t")) {
557 if (tmp2.length()) tmp2 += " ";
558 tmp2 += s;
559 }
560
561 if(tmp2.begins_with(' ')) tmp2.erase(1);
562
563 *this = tmp2;
564
565 return *this;
566}
567
568int tkstring::count_string(const tkstring &_st) const
569{
570 int count = 0;
571 size_t pos=0;
572
573 while ((pos = find(_st,pos)) != npos) {
574 pos += _st.length();
575 count++;
576 }
577
578 return count;
579}
580
582{
583 // Remove any superfluous whitespace (or tabs or newlines) from string (does not modify string)
584 // i.e. transform " Mary Had\tA Little \n Laaaaaaaaaaaaaaaaaamb"
585 // into "Mary Had A Little Lamb"
586
587 tkstring tmp = *this;
588 tkstring tmp2;
589
590 for(const auto &s: tmp.tokenize(" \n\t")) {
591 if (tmp2.length()) tmp2 += " ";
592 tmp2 += s;
593 }
594
595 if(tmp2.begins_with(' ')) tmp2.erase(1);
596
597 return tmp2;
598}
599
600bool tkstring::match(const char *_pattern) const
601{
602 // Check if pattern fit the considered string
603 // As in ls shell command the * symbol represents the non discriminant part
604 // of the pattern
605 // if no * is present in the pattern, the result correspond to TString::Contains method
606 // Example KVString st(file_R45.dat);
607 // st.Match("*") -> kTRUE
608 // st.Match("file") ->kTRUE
609 // st.Match("*file*R*") ->kTRUE
610 // etc ....
611
612 tkstring pat(_pattern);
613
614 if (!pat.contains("*")) return this->contains(pat);
615 if (pat == "*") return true;
616
617 std::vector<tkstring> tok = pat.tokenize("*");
618 int n_tok = tok.size();
619 if (!pat.begins_with("*"))
620 if (!begins_with(tok.front())) {
621 return false;
622 }
623 if (!pat.ends_with("*"))
624 if (!ends_with(tok.back())) {
625 return false;
626 }
627
628 int idx = 0, num = 0;
629 for (int ii = 0; ii < n_tok; ii += 1) {
630 idx = index(tok.at(ii), idx);
631 if (idx != -1) {
632 num += 1;
633 idx++;
634 } else break;
635 }
636 return (num == n_tok);
637}
638
644std::istream& tkstring::read_line(std::istream& _strm, bool _skip_white)
645{
646 if(_skip_white) getline(_strm >> std::ws, *this);
647 else getline(_strm, *this);
648
649 return _strm;
650}
651
653{
654 if(error.is_empty()) return -1.;
655 return error.atof()*tkstring::get_precision(val);
656}
657
671{
672 double precision = 1.0; int expo; size_t l1,l2;
673
674 l1 = st.index(".",0,tkstring::kIgnoreCase);
675 l2 = st.index("e",0,tkstring::kIgnoreCase);
676
677 if ( l1 == std::string::npos ) { // no point
678 if ( l2 == std::string::npos ) // no exponant
679 precision = 1.0;
680 else { // exponant
681 st.erase(0,l2+1);
682 expo = st.atoi();
683 precision = pow(10.,expo);
684 }
685 } else { // one point
686 if ( l2 == std::string::npos ) { // no exponant
687 expo = - (st.size() - l1 - 1);
688 precision = pow(10.0,expo);
689 }
690 else { // exponant
691 st.erase(0,l2+1);
692 expo = st.atoi();
693 expo = - (l2 - l1 - 1) + expo;
694 precision = pow(10.0,expo);
695 }
696 }
697 if ( precision < 0 ) precision = -1.0 * precision;
698
699 return precision;
700}
701
702#ifdef HAS_ROOT
703ClassImp(tkstring);
704#endif
std::string with usefull tricks from TString (ROOT) and KVString (KaliVeda) and more....
Definition tkstring.h:32
tkstring extract_alpha()
Returns a tkstring composed only of the alphabetic letters of the original tkstring.
Definition tkstring.cpp:408
tkstring strip_all_extra_white_space() const
Definition tkstring.cpp:581
tkstring copy() const
Returns a copy of this string.
Definition tkstring.cpp:391
tkstring & to_lower()
Change all letters to lower case.
Definition tkstring.cpp:80
static const char * form(const char *_format,...)
Definition tkstring.cpp:452
bool is_float() const
Checks if string contains a floating point or integer number.
Definition tkstring.cpp:131
tkstring get_last_occurence(const char *_s1)
Definition tkstring.cpp:350
std::vector< tkstring > tokenize(const tkstring &_delim=" ") const
Create a vector of string separated by at least one delimiter.
Definition tkstring.cpp:275
static tkstring Form(const char *_format,...)
Definition tkstring.cpp:366
tkstring substr(size_type __pos=0, size_type __n=npos) const
Inlines.
Definition tkstring.h:157
std::vector< tkstring > tokenize_from_string(const tkstring &_delim) const
Create a vector of string separated by a full string as delimiter.
Definition tkstring.cpp:315
bool match(const char *_pattern) const
Definition tkstring.cpp:600
bool is_alpha() const
Checks whether tkstring is only composed of alphabetic letters.
Definition tkstring.cpp:441
std::istream & read_line(std::istream &_strm, bool _skip_white=true)
tkstring::read_line
Definition tkstring.cpp:644
int atoi() const
Converts a string to integer value.
Definition tkstring.cpp:196
bool ends_with(const char *_s, ECaseCompare _cmp=kExact) const
Definition tkstring.cpp:262
static double get_absolute_error(tkstring val, tkstring error)
Get absolute uncertainty from value and error strings (1.27 4 -> 0.04), returns -1 in case of empty e...
Definition tkstring.cpp:652
bool equal_to(const char *_s, ECaseCompare _cmp=kExact) const
Returns true if the string and _s are identical.
Definition tkstring.cpp:255
size_t index(const char *_s, size_t _pos=0, ECaseCompare _cmp=kExact) const
Returns the index of the substring _s.
Definition tkstring.cpp:239
tkstring remove_alpha()
Returns a tkstring composed only of the non alphabetic letters of the original tkstring.
Definition tkstring.cpp:422
tkstring & remove_all_extra_white_space()
Definition tkstring.cpp:547
bool contains(const char *_pat, ECaseCompare _cmp=kExact) const
Definition tkstring.h:175
int count_string(const tkstring &_st) const
Definition tkstring.cpp:568
bool begins_with(const char *_s, ECaseCompare _cmp=kExact) const
Definition tkstring.h:163
tkstring & capitalize()
Change first letter of string from lower to upper case.
Definition tkstring.cpp:397
static double get_precision(tkstring _st)
Extract the precision for a given ENSDF data.
Definition tkstring.cpp:670
tkstring & replace_all(const tkstring &_s1, const tkstring &_s2)
Definition tkstring.h:181
bool is_digit() const
Checks if all characters in string are digits (0-9) or whitespaces.
Definition tkstring.cpp:102
tkstring remove_last_occurence(const char *_s1)
Definition tkstring.cpp:358
double atof() const
Converts a string to double value.
Definition tkstring.cpp:212
tkstring & to_upper()
Change all letters to upper case.
Definition tkstring.cpp:86
Definition tklog.cpp:16
std::string wrap_text(const tkstring &_text, size_t _first_content_col, size_t _continuation_col, size_t _max_line_width=80)
Definition tkstring.cpp:37
tklog & error(tklog &log)
Definition tklog.h:344