001/*
002 * Copyright 2010-2014 UnboundID Corp.
003 * All Rights Reserved.
004 */
005/*
006 * Copyright (C) 2010-2014 UnboundID Corp.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021package com.unboundid.util;
022
023
024
025import java.util.List;
026import java.util.ArrayList;
027import java.io.Serializable;
028
029import static com.unboundid.util.Validator.*;
030
031
032
033/**
034 * This class provides access to a form of a command-line argument that is
035 * safe to use in a shell.  It includes both forms for both Unix (bash shell
036 * specifically) and Windows, since there are differences between the two
037 * platforms.  Quoting of arguments is performed with the following goals:
038 *
039 * <UL>
040 *   <LI>The same form should be used for both Unix and Windows whenever
041 *       possible.</LI>
042 *   <LI>If the same form cannot be used for both platforms, then make it
043 *       as easy as possible to convert the form to the other platform.</LI>
044 *   <LI>If neither platform requires quoting of an argument, then it is not
045 *       quoted.</LI>
046 * </UL>
047 *
048 * To that end, here is the approach that we've taken:
049 *
050 * <UL>
051 *   <LI>Characters in the output are never escaped with the \ character
052 *       because Windows does not understand \ used to escape.</LI>
053 *   <LI>On Unix, double-quotes are used to quote whenever possible since
054 *       Windows does not treat single quotes specially.</LI>
055 *   <LI>If a String needs to be quoted on either platform, then it is quoted
056 *       on both.  If it needs to be quoted with single-quotes on Unix, then
057 *       it will be quoted with double quotes on Windows.
058 *   <LI>On Unix, single-quote presents a problem if it's included in a
059 *       string that needs to be singled-quoted, for instance one that includes
060 *       the $ or ! characters.  In this case, we have to wrap it in
061 *       double-quotes outside of the single-quotes.  For instance, Server's!
062 *       would end up as 'Server'"'"'s!'.</LI>
063 *   <LI>On Windows, double-quotes present a problem.  They have to be
064 *       escaped using two double-quotes inside of a double-quoted string.
065 *       For instance "Quoted" ends up as """Quoted""".</LI>
066 * </UL>
067 *
068 * All of the forms can be unambiguously parsed using the
069 * {@link #parseExampleCommandLine} method regardless of the platform.  This
070 * method can be used when needing to parse a command line that was generated
071 * by this class outside of a shell environment, e.g. if the full command line
072 * was read from a file.  Special characters that are escaped include |, &amp;, ;,
073 * (, ), !, ", ', *, ?, $, and `.
074 */
075@ThreadSafety(level = ThreadSafetyLevel.COMPLETELY_THREADSAFE)
076public final class ExampleCommandLineArgument implements Serializable
077{
078  private static final long serialVersionUID = 2468880329239320437L;
079
080  // The argument that was passed in originally.
081  private final String rawForm;
082
083  // The Unix form of the argument.
084  private final String unixForm;
085
086  // The Windows form of the argument.
087  private final String windowsForm;
088
089
090
091  /**
092   * Private constructor.
093   *
094   * @param  rawForm      The original raw form of the command line argument.
095   * @param  unixForm     The Unix form of the argument.
096   * @param  windowsForm  The Windows form of the argument.
097   */
098  private ExampleCommandLineArgument(final String rawForm,
099                                     final String unixForm,
100                                     final String windowsForm)
101  {
102    this.rawForm = rawForm;
103    this.unixForm     = unixForm;
104    this.windowsForm  = windowsForm;
105  }
106
107
108
109  /**
110   * Return the original, unquoted raw form of the argument.  This is what
111   * was passed into the {@link #getCleanArgument} method.
112   *
113   * @return  The original, unquoted form of the argument.
114   */
115  public String getRawForm()
116  {
117    return rawForm;
118  }
119
120
121
122  /**
123   * Return the form of the argument that is safe to use in a Unix command
124   * line shell.
125   *
126   * @return  The form of the argument that is safe to use in a Unix command
127   *          line shell.
128   */
129  public String getUnixForm()
130  {
131    return unixForm;
132  }
133
134
135
136  /**
137   * Return the form of the argument that is safe to use in a Windows command
138   * line shell.
139   *
140   * @return  The form of the argument that is safe to use in a Windows command
141   *          line shell.
142   */
143  public String getWindowsForm()
144  {
145    return windowsForm;
146  }
147
148
149
150  /**
151   * Return the form of the argument that is safe to use in the command line
152   * shell of the current operating system platform.
153   *
154   * @return  The form of the argument that is safe to use in a command line
155   *          shell of the current operating system platform.
156   */
157  public String getLocalForm()
158  {
159    if (StaticUtils.isWindows())
160    {
161      return getWindowsForm();
162    }
163    else
164    {
165      return getUnixForm();
166    }
167  }
168
169
170
171  /**
172   * Return a clean form of the specified argument that can be used directly
173   * on the command line.
174   *
175   * @param  argument  The raw argument to convert into a clean form that can
176   *                   be used directly on the command line.
177   *
178   * @return  The ExampleCommandLineArgument for the specified argument.
179   */
180  public static ExampleCommandLineArgument getCleanArgument(
181                                             final String argument)
182  {
183    return new ExampleCommandLineArgument(argument,
184                                          getUnixForm(argument),
185                                          getWindowsForm(argument));
186  }
187
188
189
190  /**
191   * Return a clean form of the specified argument that can be used directly
192   * on a Unix command line.
193   *
194   * @param  argument  The raw argument to convert into a clean form that can
195   *                   be used directly on the Unix command line.
196   *
197   * @return  A form of the specified argument that is clean for us on a Unix
198   *          command line.
199   */
200  public static String getUnixForm(final String argument)
201  {
202    ensureNotNull(argument);
203
204    final QuotingRequirements requirements = getRequiredUnixQuoting(argument);
205
206    String quotedArgument = argument;
207    if (requirements.requiresSingleQuotesOnUnix())
208    {
209      if (requirements.includesSingleQuote())
210      {
211        // On the primary Unix shells (e.g. bash), single-quote cannot be
212        // included in a single-quoted string.  So it has to be specified
213        // outside of the quoted part, and has to be included in "" itself.
214        quotedArgument = quotedArgument.replace("'", "'\"'\"'");
215      }
216      quotedArgument = "'" + quotedArgument + "'";
217    }
218    else if (requirements.requiresDoubleQuotesOnUnix())
219    {
220      quotedArgument = "\"" + quotedArgument + "\"";
221    }
222
223    return quotedArgument;
224  }
225
226
227
228  /**
229   * Return a clean form of the specified argument that can be used directly
230   * on a Windows command line.
231   *
232   * @param  argument  The raw argument to convert into a clean form that can
233   *                   be used directly on the Windows command line.
234   *
235   * @return  A form of the specified argument that is clean for us on a Windows
236   *          command line.
237   */
238  public static String getWindowsForm(final String argument)
239  {
240    ensureNotNull(argument);
241
242    final QuotingRequirements requirements = getRequiredUnixQuoting(argument);
243
244    String quotedArgument = argument;
245
246    // Windows only supports double-quotes.  They are treated much more like
247    // single-quotes on Unix.  Only " needs to be escaped, and it's done by
248    // repeating it, i.e. """"" gets passed into the program as just "
249    if (requirements.requiresSingleQuotesOnUnix() ||
250        requirements.requiresDoubleQuotesOnUnix())
251    {
252      if (requirements.includesDoubleQuote())
253      {
254        quotedArgument = quotedArgument.replace("\"", "\"\"");
255      }
256      quotedArgument = "\"" + quotedArgument + "\"";
257    }
258
259    return quotedArgument;
260  }
261
262
263
264
265  /**
266   * Return a list of raw parameters that were parsed from the specified String.
267   * This can be used to undo the quoting that was done by
268   * {@link #getCleanArgument}.  It perfectly handles any String that was
269   * passed into this method, but it won't behave exactly as any single shell
270   * behaves because they aren't consistent.  For instance, it will never
271   * treat \\ as an escape character.
272   *
273   * @param  exampleCommandLine  The command line to parse.
274   *
275   * @return  A list of raw arguments that were parsed from the specified
276   *          example usage command line.
277   */
278  public static List<String> parseExampleCommandLine(
279                                 final String exampleCommandLine)
280  {
281    ensureNotNull(exampleCommandLine);
282
283    boolean inDoubleQuote = false;
284    boolean inSingleQuote = false;
285
286    List<String> args = new ArrayList<String>();
287
288    StringBuilder currentArg = new StringBuilder();
289    boolean inArg = false;
290    for (int i = 0; i < exampleCommandLine.length(); i++) {
291      Character c = exampleCommandLine.charAt(i);
292
293      Character nextChar = null;
294      if (i < (exampleCommandLine.length() - 1))
295      {
296        nextChar = exampleCommandLine.charAt(i + 1);
297      }
298
299      if (inDoubleQuote)
300      {
301        if (c == '"')
302        {
303          if ((nextChar != null) && (nextChar == '"'))
304          {
305            // Handle the special case on Windows where a " is escaped inside
306            // of double-quotes using "", i.e. to get " passed into the program,
307            // """" must be specified.
308            currentArg.append('\"');
309            i++;
310          }
311          else
312          {
313            inDoubleQuote = false;
314          }
315        }
316        else
317        {
318          currentArg.append(c);
319        }
320      }
321      else if (inSingleQuote)
322      {
323        if (c == '\'')
324        {
325          inSingleQuote = false;
326        }
327        else
328        {
329          currentArg.append(c);
330        }
331      }
332      else if (c == '"')
333      {
334        inDoubleQuote = true;
335        inArg = true;
336      }
337      else if (c == '\'')
338      {
339        inSingleQuote = true;
340        inArg = true;
341      }
342      else if ((c == ' ') || (c == '\t'))
343      {
344        if (inArg)
345        {
346          args.add(currentArg.toString());
347          currentArg = new StringBuilder();
348          inArg = false;
349        }
350      }
351      else
352      {
353        currentArg.append(c);
354        inArg = true;
355      }
356    }
357
358    if (inArg)
359    {
360      args.add(currentArg.toString());
361    }
362
363    return args;
364  }
365
366
367
368  /**
369   * Examines the specified argument to determine how it will need to be
370   * quoted.
371   *
372   * @param  argument  The argument to examine.
373   *
374   * @return  The QuotingRequirements for the specified argument.
375   */
376  private static QuotingRequirements getRequiredUnixQuoting(
377                                         final String argument)
378  {
379    boolean requiresDoubleQuotes = false;
380    boolean requiresSingleQuotes = false;
381    boolean includesDoubleQuote = false;
382    boolean includesSingleQuote = false;
383
384    if (argument.length() == 0)
385    {
386      requiresDoubleQuotes = true;
387    }
388
389    for (int i=0; i < argument.length(); i++)
390    {
391      final char c = argument.charAt(i);
392      switch (c)
393      {
394        case '"':
395          includesDoubleQuote = true;
396          requiresSingleQuotes = true;
397          break;
398        case '\\':
399        case '!':
400        case '`':
401        case '$':
402        case '@':
403        case '*':
404          requiresSingleQuotes = true;
405          break;
406
407        case '\'':
408          includesSingleQuote = true;
409          requiresDoubleQuotes = true;
410          break;
411        case ' ':
412        case '|':
413        case '&':
414        case ';':
415        case '(':
416        case ')':
417        case '<':
418        case '>':
419          requiresDoubleQuotes = true;
420          break;
421
422        case ',':
423        case '=':
424        case '-':
425        case '_':
426        case ':':
427        case '.':
428        case '/':
429          // These are safe, so just ignore them.
430          break;
431
432        default:
433          if (((c >= 'a') && (c <= 'z')) ||
434              ((c >= 'A') && (c <= 'Z')) ||
435              ((c >= '0') && (c <= '9')))
436          {
437            // These are safe, so just ignore them.
438          }
439          else
440          {
441            requiresDoubleQuotes = true;
442          }
443      }
444    }
445
446    if (requiresSingleQuotes)
447    {
448      // Single-quoting trumps double-quotes.
449      requiresDoubleQuotes = false;
450    }
451
452    return new QuotingRequirements(requiresSingleQuotes, requiresDoubleQuotes,
453                                   includesSingleQuote, includesDoubleQuote);
454  }
455}