001/*
002 * Copyright 2001-2006 Geert Bevin <gbevin[remove] at uwyn dot com>
003 * Distributed under the terms of either:
004 * - the common development and distribution license (CDDL), v1.0; or
005 * - the GNU Lesser General Public License, v2.1 or later
006 * $Id: StringUtils.java 3106 2006-03-13 17:53:50Z gbevin $
007 */
008package com.uwyn.jhighlight.tools;
009
010import com.uwyn.jhighlight.pcj.map.CharKeyOpenHashMap;
011import java.util.ArrayList;
012import java.util.Iterator;
013import java.util.regex.Pattern;
014
015/**
016 * General purpose class containing common <code>String</code> manipulation
017 * methods.
018 *
019 * @author Geert Bevin (gbevin[remove] at uwyn dot com)
020 * @version $Revision: 3106 $
021 * @since 1.0
022 */
023public abstract class StringUtils
024{
025        private static final CharKeyOpenHashMap mHtmlEncodeMap = new CharKeyOpenHashMap();
026        
027        static
028        {
029                // Html encoding mapping according to the HTML 4.0 spec
030                // http://www.w3.org/TR/REC-html40/sgml/entities.html
031                
032                // Special characters for HTML
033                mHtmlEncodeMap.put('\u0026', "&amp;");
034                mHtmlEncodeMap.put('\u003C', "&lt;");
035                mHtmlEncodeMap.put('\u003E', "&gt;");
036                mHtmlEncodeMap.put('\u0022', "&quot;");
037                
038                mHtmlEncodeMap.put('\u0152', "&OElig;");
039                mHtmlEncodeMap.put('\u0153', "&oelig;");
040                mHtmlEncodeMap.put('\u0160', "&Scaron;");
041                mHtmlEncodeMap.put('\u0161', "&scaron;");
042                mHtmlEncodeMap.put('\u0178', "&Yuml;");
043                mHtmlEncodeMap.put('\u02C6', "&circ;");
044                mHtmlEncodeMap.put('\u02DC', "&tilde;");
045                mHtmlEncodeMap.put('\u2002', "&ensp;");
046                mHtmlEncodeMap.put('\u2003', "&emsp;");
047                mHtmlEncodeMap.put('\u2009', "&thinsp;");
048                mHtmlEncodeMap.put('\u200C', "&zwnj;");
049                mHtmlEncodeMap.put('\u200D', "&zwj;");
050                mHtmlEncodeMap.put('\u200E', "&lrm;");
051                mHtmlEncodeMap.put('\u200F', "&rlm;");
052                mHtmlEncodeMap.put('\u2013', "&ndash;");
053                mHtmlEncodeMap.put('\u2014', "&mdash;");
054                mHtmlEncodeMap.put('\u2018', "&lsquo;");
055                mHtmlEncodeMap.put('\u2019', "&rsquo;");
056                mHtmlEncodeMap.put('\u201A', "&sbquo;");
057                mHtmlEncodeMap.put('\u201C', "&ldquo;");
058                mHtmlEncodeMap.put('\u201D', "&rdquo;");
059                mHtmlEncodeMap.put('\u201E', "&bdquo;");
060                mHtmlEncodeMap.put('\u2020', "&dagger;");
061                mHtmlEncodeMap.put('\u2021', "&Dagger;");
062                mHtmlEncodeMap.put('\u2030', "&permil;");
063                mHtmlEncodeMap.put('\u2039', "&lsaquo;");
064                mHtmlEncodeMap.put('\u203A', "&rsaquo;");
065                mHtmlEncodeMap.put('\u20AC', "&euro;");
066                
067                // Character entity references for ISO 8859-1 characters
068                mHtmlEncodeMap.put('\u00A0', "&nbsp;");
069                mHtmlEncodeMap.put('\u00A1', "&iexcl;");
070                mHtmlEncodeMap.put('\u00A2', "&cent;");
071                mHtmlEncodeMap.put('\u00A3', "&pound;");
072                mHtmlEncodeMap.put('\u00A4', "&curren;");
073                mHtmlEncodeMap.put('\u00A5', "&yen;");
074                mHtmlEncodeMap.put('\u00A6', "&brvbar;");
075                mHtmlEncodeMap.put('\u00A7', "&sect;");
076                mHtmlEncodeMap.put('\u00A8', "&uml;");
077                mHtmlEncodeMap.put('\u00A9', "&copy;");
078                mHtmlEncodeMap.put('\u00AA', "&ordf;");
079                mHtmlEncodeMap.put('\u00AB', "&laquo;");
080                mHtmlEncodeMap.put('\u00AC', "&not;");
081                mHtmlEncodeMap.put('\u00AD', "&shy;");
082                mHtmlEncodeMap.put('\u00AE', "&reg;");
083                mHtmlEncodeMap.put('\u00AF', "&macr;");
084                mHtmlEncodeMap.put('\u00B0', "&deg;");
085                mHtmlEncodeMap.put('\u00B1', "&plusmn;");
086                mHtmlEncodeMap.put('\u00B2', "&sup2;");
087                mHtmlEncodeMap.put('\u00B3', "&sup3;");
088                mHtmlEncodeMap.put('\u00B4', "&acute;");
089                mHtmlEncodeMap.put('\u00B5', "&micro;");
090                mHtmlEncodeMap.put('\u00B6', "&para;");
091                mHtmlEncodeMap.put('\u00B7', "&middot;");
092                mHtmlEncodeMap.put('\u00B8', "&cedil;");
093                mHtmlEncodeMap.put('\u00B9', "&sup1;");
094                mHtmlEncodeMap.put('\u00BA', "&ordm;");
095                mHtmlEncodeMap.put('\u00BB', "&raquo;");
096                mHtmlEncodeMap.put('\u00BC', "&frac14;");
097                mHtmlEncodeMap.put('\u00BD', "&frac12;");
098                mHtmlEncodeMap.put('\u00BE', "&frac34;");
099                mHtmlEncodeMap.put('\u00BF', "&iquest;");
100                mHtmlEncodeMap.put('\u00C0', "&Agrave;");
101                mHtmlEncodeMap.put('\u00C1', "&Aacute;");
102                mHtmlEncodeMap.put('\u00C2', "&Acirc;");
103                mHtmlEncodeMap.put('\u00C3', "&Atilde;");
104                mHtmlEncodeMap.put('\u00C4', "&Auml;");
105                mHtmlEncodeMap.put('\u00C5', "&Aring;");
106                mHtmlEncodeMap.put('\u00C6', "&AElig;");
107                mHtmlEncodeMap.put('\u00C7', "&Ccedil;");
108                mHtmlEncodeMap.put('\u00C8', "&Egrave;");
109                mHtmlEncodeMap.put('\u00C9', "&Eacute;");
110                mHtmlEncodeMap.put('\u00CA', "&Ecirc;");
111                mHtmlEncodeMap.put('\u00CB', "&Euml;");
112                mHtmlEncodeMap.put('\u00CC', "&Igrave;");
113                mHtmlEncodeMap.put('\u00CD', "&Iacute;");
114                mHtmlEncodeMap.put('\u00CE', "&Icirc;");
115                mHtmlEncodeMap.put('\u00CF', "&Iuml;");
116                mHtmlEncodeMap.put('\u00D0', "&ETH;");
117                mHtmlEncodeMap.put('\u00D1', "&Ntilde;");
118                mHtmlEncodeMap.put('\u00D2', "&Ograve;");
119                mHtmlEncodeMap.put('\u00D3', "&Oacute;");
120                mHtmlEncodeMap.put('\u00D4', "&Ocirc;");
121                mHtmlEncodeMap.put('\u00D5', "&Otilde;");
122                mHtmlEncodeMap.put('\u00D6', "&Ouml;");
123                mHtmlEncodeMap.put('\u00D7', "&times;");
124                mHtmlEncodeMap.put('\u00D8', "&Oslash;");
125                mHtmlEncodeMap.put('\u00D9', "&Ugrave;");
126                mHtmlEncodeMap.put('\u00DA', "&Uacute;");
127                mHtmlEncodeMap.put('\u00DB', "&Ucirc;");
128                mHtmlEncodeMap.put('\u00DC', "&Uuml;");
129                mHtmlEncodeMap.put('\u00DD', "&Yacute;");
130                mHtmlEncodeMap.put('\u00DE', "&THORN;");
131                mHtmlEncodeMap.put('\u00DF', "&szlig;");
132                mHtmlEncodeMap.put('\u00E0', "&agrave;");
133                mHtmlEncodeMap.put('\u00E1', "&aacute;");
134                mHtmlEncodeMap.put('\u00E2', "&acirc;");
135                mHtmlEncodeMap.put('\u00E3', "&atilde;");
136                mHtmlEncodeMap.put('\u00E4', "&auml;");
137                mHtmlEncodeMap.put('\u00E5', "&aring;");
138                mHtmlEncodeMap.put('\u00E6', "&aelig;");
139                mHtmlEncodeMap.put('\u00E7', "&ccedil;");
140                mHtmlEncodeMap.put('\u00E8', "&egrave;");
141                mHtmlEncodeMap.put('\u00E9', "&eacute;");
142                mHtmlEncodeMap.put('\u00EA', "&ecirc;");
143                mHtmlEncodeMap.put('\u00EB', "&euml;");
144                mHtmlEncodeMap.put('\u00EC', "&igrave;");
145                mHtmlEncodeMap.put('\u00ED', "&iacute;");
146                mHtmlEncodeMap.put('\u00EE', "&icirc;");
147                mHtmlEncodeMap.put('\u00EF', "&iuml;");
148                mHtmlEncodeMap.put('\u00F0', "&eth;");
149                mHtmlEncodeMap.put('\u00F1', "&ntilde;");
150                mHtmlEncodeMap.put('\u00F2', "&ograve;");
151                mHtmlEncodeMap.put('\u00F3', "&oacute;");
152                mHtmlEncodeMap.put('\u00F4', "&ocirc;");
153                mHtmlEncodeMap.put('\u00F5', "&otilde;");
154                mHtmlEncodeMap.put('\u00F6', "&ouml;");
155                mHtmlEncodeMap.put('\u00F7', "&divide;");
156                mHtmlEncodeMap.put('\u00F8', "&oslash;");
157                mHtmlEncodeMap.put('\u00F9', "&ugrave;");
158                mHtmlEncodeMap.put('\u00FA', "&uacute;");
159                mHtmlEncodeMap.put('\u00FB', "&ucirc;");
160                mHtmlEncodeMap.put('\u00FC', "&uuml;");
161                mHtmlEncodeMap.put('\u00FD', "&yacute;");
162                mHtmlEncodeMap.put('\u00FE', "&thorn;");
163                mHtmlEncodeMap.put('\u00FF', "&yuml;");
164                
165                // Mathematical, Greek and Symbolic characters for HTML
166                mHtmlEncodeMap.put('\u0192', "&fnof;");
167                mHtmlEncodeMap.put('\u0391', "&Alpha;");
168                mHtmlEncodeMap.put('\u0392', "&Beta;");
169                mHtmlEncodeMap.put('\u0393', "&Gamma;");
170                mHtmlEncodeMap.put('\u0394', "&Delta;");
171                mHtmlEncodeMap.put('\u0395', "&Epsilon;");
172                mHtmlEncodeMap.put('\u0396', "&Zeta;");
173                mHtmlEncodeMap.put('\u0397', "&Eta;");
174                mHtmlEncodeMap.put('\u0398', "&Theta;");
175                mHtmlEncodeMap.put('\u0399', "&Iota;");
176                mHtmlEncodeMap.put('\u039A', "&Kappa;");
177                mHtmlEncodeMap.put('\u039B', "&Lambda;");
178                mHtmlEncodeMap.put('\u039C', "&Mu;");
179                mHtmlEncodeMap.put('\u039D', "&Nu;");
180                mHtmlEncodeMap.put('\u039E', "&Xi;");
181                mHtmlEncodeMap.put('\u039F', "&Omicron;");
182                mHtmlEncodeMap.put('\u03A0', "&Pi;");
183                mHtmlEncodeMap.put('\u03A1', "&Rho;");
184                mHtmlEncodeMap.put('\u03A3', "&Sigma;");
185                mHtmlEncodeMap.put('\u03A4', "&Tau;");
186                mHtmlEncodeMap.put('\u03A5', "&Upsilon;");
187                mHtmlEncodeMap.put('\u03A6', "&Phi;");
188                mHtmlEncodeMap.put('\u03A7', "&Chi;");
189                mHtmlEncodeMap.put('\u03A8', "&Psi;");
190                mHtmlEncodeMap.put('\u03A9', "&Omega;");
191                mHtmlEncodeMap.put('\u03B1', "&alpha;");
192                mHtmlEncodeMap.put('\u03B2', "&beta;");
193                mHtmlEncodeMap.put('\u03B3', "&gamma;");
194                mHtmlEncodeMap.put('\u03B4', "&delta;");
195                mHtmlEncodeMap.put('\u03B5', "&epsilon;");
196                mHtmlEncodeMap.put('\u03B6', "&zeta;");
197                mHtmlEncodeMap.put('\u03B7', "&eta;");
198                mHtmlEncodeMap.put('\u03B8', "&theta;");
199                mHtmlEncodeMap.put('\u03B9', "&iota;");
200                mHtmlEncodeMap.put('\u03BA', "&kappa;");
201                mHtmlEncodeMap.put('\u03BB', "&lambda;");
202                mHtmlEncodeMap.put('\u03BC', "&mu;");
203                mHtmlEncodeMap.put('\u03BD', "&nu;");
204                mHtmlEncodeMap.put('\u03BE', "&xi;");
205                mHtmlEncodeMap.put('\u03BF', "&omicron;");
206                mHtmlEncodeMap.put('\u03C0', "&pi;");
207                mHtmlEncodeMap.put('\u03C1', "&rho;");
208                mHtmlEncodeMap.put('\u03C2', "&sigmaf;");
209                mHtmlEncodeMap.put('\u03C3', "&sigma;");
210                mHtmlEncodeMap.put('\u03C4', "&tau;");
211                mHtmlEncodeMap.put('\u03C5', "&upsilon;");
212                mHtmlEncodeMap.put('\u03C6', "&phi;");
213                mHtmlEncodeMap.put('\u03C7', "&chi;");
214                mHtmlEncodeMap.put('\u03C8', "&psi;");
215                mHtmlEncodeMap.put('\u03C9', "&omega;");
216                mHtmlEncodeMap.put('\u03D1', "&thetasym;");
217                mHtmlEncodeMap.put('\u03D2', "&upsih;");
218                mHtmlEncodeMap.put('\u03D6', "&piv;");
219                mHtmlEncodeMap.put('\u2022', "&bull;");
220                mHtmlEncodeMap.put('\u2026', "&hellip;");
221                mHtmlEncodeMap.put('\u2032', "&prime;");
222                mHtmlEncodeMap.put('\u2033', "&Prime;");
223                mHtmlEncodeMap.put('\u203E', "&oline;");
224                mHtmlEncodeMap.put('\u2044', "&frasl;");
225                mHtmlEncodeMap.put('\u2118', "&weierp;");
226                mHtmlEncodeMap.put('\u2111', "&image;");
227                mHtmlEncodeMap.put('\u211C', "&real;");
228                mHtmlEncodeMap.put('\u2122', "&trade;");
229                mHtmlEncodeMap.put('\u2135', "&alefsym;");
230                mHtmlEncodeMap.put('\u2190', "&larr;");
231                mHtmlEncodeMap.put('\u2191', "&uarr;");
232                mHtmlEncodeMap.put('\u2192', "&rarr;");
233                mHtmlEncodeMap.put('\u2193', "&darr;");
234                mHtmlEncodeMap.put('\u2194', "&harr;");
235                mHtmlEncodeMap.put('\u21B5', "&crarr;");
236                mHtmlEncodeMap.put('\u21D0', "&lArr;");
237                mHtmlEncodeMap.put('\u21D1', "&uArr;");
238                mHtmlEncodeMap.put('\u21D2', "&rArr;");
239                mHtmlEncodeMap.put('\u21D3', "&dArr;");
240                mHtmlEncodeMap.put('\u21D4', "&hArr;");
241                mHtmlEncodeMap.put('\u2200', "&forall;");
242                mHtmlEncodeMap.put('\u2202', "&part;");
243                mHtmlEncodeMap.put('\u2203', "&exist;");
244                mHtmlEncodeMap.put('\u2205', "&empty;");
245                mHtmlEncodeMap.put('\u2207', "&nabla;");
246                mHtmlEncodeMap.put('\u2208', "&isin;");
247                mHtmlEncodeMap.put('\u2209', "&notin;");
248                mHtmlEncodeMap.put('\u220B', "&ni;");
249                mHtmlEncodeMap.put('\u220F', "&prod;");
250                mHtmlEncodeMap.put('\u2211', "&sum;");
251                mHtmlEncodeMap.put('\u2212', "&minus;");
252                mHtmlEncodeMap.put('\u2217', "&lowast;");
253                mHtmlEncodeMap.put('\u221A', "&radic;");
254                mHtmlEncodeMap.put('\u221D', "&prop;");
255                mHtmlEncodeMap.put('\u221E', "&infin;");
256                mHtmlEncodeMap.put('\u2220', "&ang;");
257                mHtmlEncodeMap.put('\u2227', "&and;");
258                mHtmlEncodeMap.put('\u2228', "&or;");
259                mHtmlEncodeMap.put('\u2229', "&cap;");
260                mHtmlEncodeMap.put('\u222A', "&cup;");
261                mHtmlEncodeMap.put('\u222B', "&int;");
262                mHtmlEncodeMap.put('\u2234', "&there4;");
263                mHtmlEncodeMap.put('\u223C', "&sim;");
264                mHtmlEncodeMap.put('\u2245', "&cong;");
265                mHtmlEncodeMap.put('\u2248', "&asymp;");
266                mHtmlEncodeMap.put('\u2260', "&ne;");
267                mHtmlEncodeMap.put('\u2261', "&equiv;");
268                mHtmlEncodeMap.put('\u2264', "&le;");
269                mHtmlEncodeMap.put('\u2265', "&ge;");
270                mHtmlEncodeMap.put('\u2282', "&sub;");
271                mHtmlEncodeMap.put('\u2283', "&sup;");
272                mHtmlEncodeMap.put('\u2284', "&nsub;");
273                mHtmlEncodeMap.put('\u2286', "&sube;");
274                mHtmlEncodeMap.put('\u2287', "&supe;");
275                mHtmlEncodeMap.put('\u2295', "&oplus;");
276                mHtmlEncodeMap.put('\u2297', "&otimes;");
277                mHtmlEncodeMap.put('\u22A5', "&perp;");
278                mHtmlEncodeMap.put('\u22C5', "&sdot;");
279                mHtmlEncodeMap.put('\u2308', "&lceil;");
280                mHtmlEncodeMap.put('\u2309', "&rceil;");
281                mHtmlEncodeMap.put('\u230A', "&lfloor;");
282                mHtmlEncodeMap.put('\u230B', "&rfloor;");
283                mHtmlEncodeMap.put('\u2329', "&lang;");
284                mHtmlEncodeMap.put('\u232A', "&rang;");
285                mHtmlEncodeMap.put('\u25CA', "&loz;");
286                mHtmlEncodeMap.put('\u2660', "&spades;");
287                mHtmlEncodeMap.put('\u2663', "&clubs;");
288                mHtmlEncodeMap.put('\u2665', "&hearts;");
289                mHtmlEncodeMap.put('\u2666', "&diams;");
290        }
291        
292        private StringUtils()
293        {
294        }
295        
296        /**
297         * Transforms a provided <code>String</code> object into a new string,
298         * containing only valid Html characters.
299         *
300         * @param source The string that has to be transformed into a valid Html
301         * string.
302         *
303         * @return The encoded <code>String</code> object.
304         *
305         * @since 1.0
306         */
307        public static String encodeHtml(String source)
308        {
309                return encode(source, mHtmlEncodeMap);
310        }
311        
312        /**
313         * Transforms a provided <code>String</code> object into a new string,
314         * using the mapping that are provided through the supplied encoding table.
315         *
316         * @param source The string that has to be transformed into a valid string,
317         * using the mappings that are provided through the supplied encoding table.
318         * @param encodingTables A <code>Map</code> object containing the mappings to
319         * transform characters into valid entities. The keys of this map should be
320         * <code>Character</code> objects and the values <code>String</code>
321         * objects.
322         *
323         * @return The encoded <code>String</code> object.
324         *
325         * @since 1.0
326         */
327        private static String encode(String source, CharKeyOpenHashMap encodingTable)
328        {
329                if (null == source)
330                {
331                        return null;
332                }
333                
334                if (null == encodingTable)
335                {
336                        return source;
337                }
338                
339                StringBuffer    encoded_string = null;
340                char[]                  string_to_encode_array = source.toCharArray();
341                int                             last_match = -1;
342                int                             difference = 0;
343                
344                for (int i = 0; i < string_to_encode_array.length; i++)
345                {
346                        char char_to_encode = string_to_encode_array[i];
347                        
348                        if (encodingTable.containsKey(char_to_encode))
349                        {
350                                if (null == encoded_string)
351                                {
352                                        encoded_string = new StringBuffer(source.length());
353                                }
354                                difference = i - (last_match + 1);
355                                if (difference > 0)
356                                {
357                                        encoded_string.append(string_to_encode_array, last_match + 1, difference);
358                                }
359                                encoded_string.append(encodingTable.get(char_to_encode));
360                                last_match = i;
361                        }
362                }
363                
364                if (null == encoded_string)
365                {
366                        return source;
367                }
368                else
369                {
370                        difference = string_to_encode_array.length - (last_match + 1);
371                        if (difference > 0)
372                        {
373                                encoded_string.append(string_to_encode_array, last_match + 1, difference);
374                        }
375                        return encoded_string.toString();
376                }
377        }
378        
379        /**
380         * Checks if the name filters through an including and an excluding
381         * regular expression.
382         *
383         * @param name The <code>String</code> that will be filtered.
384         * @param included The regular expressions that needs to succeed
385         * @param excluded The regular expressions that needs to fail
386         *
387         * @return <code>true</code> if the name filtered through correctly; or
388         * <p>
389         * <code>false</code> otherwise.
390         *
391         * @since 1.0
392         */
393        public static boolean filter(String name, Pattern included, Pattern excluded)
394        {
395                Pattern[] included_array = null;
396                if (included != null)
397                {
398                        included_array = new Pattern[] {included};
399                }
400                
401                Pattern[] excluded_array = null;
402                if (excluded != null)
403                {
404                        excluded_array = new Pattern[] {excluded};
405                }
406                
407                return filter(name, included_array, excluded_array);
408        }
409        
410        /**
411         * Checks if the name filters through a series of including and excluding
412         * regular expressions.
413         *
414         * @param name The <code>String</code> that will be filtered.
415         * @param included An array of regular expressions that need to succeed
416         * @param excluded An array of regular expressions that need to fail
417         *
418         * @return <code>true</code> if the name filtered through correctly; or
419         * <p>
420         * <code>false</code> otherwise.
421         *
422         * @since 1.0
423         */
424        public static boolean filter(String name, Pattern[] included, Pattern[] excluded)
425        {
426                if (null == name)
427                {
428                        return false;
429                }
430                
431                boolean accepted = false;
432                
433                // retain only the includes
434                if (null == included)
435                {
436                        accepted = true;
437                }
438                else
439                {
440                        Pattern pattern;
441                        for (int i = 0; i < included.length; i++)
442                        {
443                                pattern = included[i];
444                                
445                                if (pattern != null &&
446                                        pattern.matcher(name).matches())
447                                {
448                                        accepted = true;
449                                        break;
450                                }
451                        }
452                }
453                
454                // remove the excludes
455                if (accepted &&
456                        excluded != null)
457                {
458                        Pattern pattern;
459                        for (int i = 0; i < excluded.length; i++)
460                        {
461                                pattern = excluded[i];
462                                
463                                if (pattern != null &&
464                                        pattern.matcher(name).matches())
465                                {
466                                        accepted = false;
467                                        break;
468                                }
469                        }
470                }
471                
472                return accepted;
473        }
474        
475        /**
476         * Splits a string into different parts, using a seperator string to detect
477         * the seperation boundaries in a case-sensitive manner. The seperator will
478         * not be included in the list of parts.
479         *
480         * @param source The string that will be split into parts.
481         * @param seperator The seperator string that will be used to determine the
482         * parts.
483         *
484         * @return An <code>ArrayList</code> containing the parts as
485         * <code>String</code> objects.
486         *
487         * @since 1.0
488         */
489        public static ArrayList split(String source, String seperator)
490        {
491                return split(source, seperator, true);
492        }
493        
494        /**
495         * Splits a string into different parts, using a seperator string to detect
496         * the seperation boundaries. The seperator will not be included in the list
497         * of parts.
498         *
499         * @param source The string that will be split into parts.
500         * @param seperator The seperator string that will be used to determine the
501         * parts.
502         * @param matchCase A <code>boolean</code> indicating if the match is going
503         * to be performed in a case-sensitive manner or not.
504         *
505         * @return An <code>ArrayList</code> containing the parts as
506         * <code>String</code> objects.
507         *
508         * @since 1.0
509         */
510        public static ArrayList split(String source, String seperator, boolean matchCase)
511        {
512                ArrayList       substrings = new ArrayList();
513                
514                if (null == source)
515                {
516                        return substrings;
517                }
518                
519                if (null == seperator)
520                {
521                        substrings.add(source);
522                        return substrings;
523                }
524                
525                int             current_index = 0;
526                int             delimiter_index = 0;
527                String  element = null;
528                
529                String  source_lookup_reference = null;
530                if (!matchCase)
531                {
532                        source_lookup_reference = source.toLowerCase();
533                        seperator = seperator.toLowerCase();
534                }
535                else
536                {
537                        source_lookup_reference = source;
538                }
539                
540                while (current_index <= source_lookup_reference.length())
541                {
542                        delimiter_index = source_lookup_reference.indexOf(seperator, current_index);
543                        
544                        if (-1 == delimiter_index)
545                        {
546                                element = new String(source.substring(current_index, source.length()));
547                                substrings.add(element);
548                                current_index = source.length() + 1;
549                        }
550                        else
551                        {
552                                element = new String(source.substring(current_index, delimiter_index));
553                                substrings.add(element);
554                                current_index = delimiter_index + seperator.length();
555                        }
556                }
557                
558                return substrings;
559        }
560        
561        /**
562         * Searches for a string within a specified string in a case-sensitive
563         * manner and replaces every match with another string.
564         *
565         * @param source The string in which the matching parts will be replaced.
566         * @param stringToReplace The string that will be searched for.
567         * @param replacementString The string that will replace each matching part.
568         *
569         * @return A new <code>String</code> object containing the replacement
570         * result.
571         *
572         * @since 1.0
573         */
574        public static String replace(String source, String stringToReplace, String replacementString)
575        {
576                return replace(source, stringToReplace, replacementString, true);
577        }
578        
579        /**
580         * Searches for a string within a specified string and replaces every match
581         * with another string.
582         *
583         * @param source The string in which the matching parts will be replaced.
584         * @param stringToReplace The string that will be searched for.
585         * @param replacementString The string that will replace each matching part.
586         * @param matchCase A <code>boolean</code> indicating if the match is going
587         * to be performed in a case-sensitive manner or not.
588         *
589         * @return A new <code>String</code> object containing the replacement
590         * result.
591         *
592         * @since 1.0
593         */
594        public static String replace(String source, String stringToReplace, String replacementString, boolean matchCase)
595        {
596                if (null == source)
597                {
598                        return null;
599                }
600                
601                if (null == stringToReplace)
602                {
603                        return source;
604                }
605                
606                if (null == replacementString)
607                {
608                        return source;
609                }
610                
611                Iterator                string_parts = split(source, stringToReplace, matchCase).iterator();
612                StringBuffer    new_string = new StringBuffer();
613                
614                synchronized (new_string) // speed increase by thread lock pre-allocation
615                {
616                        while (string_parts.hasNext())
617                        {
618                                String string_part = (String)string_parts.next();
619                                new_string.append(string_part);
620                                if (string_parts.hasNext())
621                                {
622                                        new_string.append(replacementString);
623                                }
624                        }
625                        
626                        return new_string.toString();
627                }
628        }
629        
630        /**
631         * Creates a new string that contains the provided string a number of times.
632         *
633         * @param source The string that will be repeated.
634         * @param count  The number of times that the string will be repeated.
635         * @return A new <code>String</code> object containing the repeated
636         * concatenation result.
637         *
638         * @since 1.0
639         */
640        public static String repeat(String source, int count)
641        {
642                if (null == source)
643                {
644                        return null;
645                }
646                
647                StringBuffer new_string = new StringBuffer();
648                synchronized (new_string) // speed increase by thread lock pre-allocation
649                {
650                        while (count > 0)
651                        {
652                                new_string.append(source);
653                                count --;
654                        }
655                        
656                        return new_string.toString();
657                }
658        }
659        
660        /**
661         * Converts all tabs on a line to spaces according to the provided tab
662         * width.
663         *
664         * @param line The line whose tabs have to be converted.
665         * @param tabWidth The tab width.
666         * @return A new <code>String</code> object containing the line with the
667         * replaced tabs.
668         * @since 1.0
669         */
670        public static String convertTabsToSpaces(String line, int tabWidth)
671        {
672                StringBuffer result = new StringBuffer();
673                
674                synchronized (result) // speed increase by thread lock pre-allocation
675                {
676                        int tab_index = -1;
677                        int last_tab_index = 0;
678                        int added_chars = 0;
679                        int tab_size;
680                        while ((tab_index = line.indexOf("\t", last_tab_index)) != -1)
681                        {
682                                tab_size = tabWidth - ((tab_index + added_chars) % tabWidth);
683                                if (0 == tab_size)
684                                {
685                                        tab_size = tabWidth;
686                                }
687                                added_chars += tab_size - 1;
688                                result.append(line.substring(last_tab_index, tab_index));
689                                result.append(StringUtils.repeat(" ", tab_size));
690                                last_tab_index = tab_index + 1;
691                        }
692                        if (0 == last_tab_index)
693                        {
694                                return line;
695                        }
696                        else
697                        {
698                                result.append(line.substring(last_tab_index));
699                        }
700                }
701                
702                return result.toString();
703        }
704}
705
706