001/*
002 * Copyright 2007-2014 UnboundID Corp.
003 * All Rights Reserved.
004 */
005/*
006 * Copyright (C) 2008-2014 UnboundID Corp.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021package com.unboundid.ldap.matchingrules;
022
023
024
025import com.unboundid.asn1.ASN1OctetString;
026import com.unboundid.util.ThreadSafety;
027import com.unboundid.util.ThreadSafetyLevel;
028
029import static com.unboundid.util.StaticUtils.*;
030
031
032
033/**
034 * This class provides an implementation of a matching rule that uses
035 * case-insensitive matching that also treats multiple consecutive (non-escaped)
036 * spaces as a single space.
037 */
038@ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE)
039public final class CaseIgnoreStringMatchingRule
040       extends AcceptAllSimpleMatchingRule
041{
042  /**
043   * The singleton instance that will be returned from the {@code getInstance}
044   * method.
045   */
046  private static final CaseIgnoreStringMatchingRule INSTANCE =
047       new CaseIgnoreStringMatchingRule();
048
049
050
051  /**
052   * The name for the caseIgnoreMatch equality matching rule.
053   */
054  public static final String EQUALITY_RULE_NAME = "caseIgnoreMatch";
055
056
057
058  /**
059   * The name for the caseIgnoreMatch equality matching rule, formatted in all
060   * lowercase characters.
061   */
062  static final String LOWER_EQUALITY_RULE_NAME =
063       toLowerCase(EQUALITY_RULE_NAME);
064
065
066
067  /**
068   * The OID for the caseIgnoreMatch equality matching rule.
069   */
070  public static final String EQUALITY_RULE_OID = "2.5.13.2";
071
072
073
074  /**
075   * The name for the caseIgnoreOrderingMatch ordering matching rule.
076   */
077  public static final String ORDERING_RULE_NAME = "caseIgnoreOrderingMatch";
078
079
080
081  /**
082   * The name for the caseIgnoreOrderingMatch ordering matching rule, formatted
083   * in all lowercase characters.
084   */
085  static final String LOWER_ORDERING_RULE_NAME =
086       toLowerCase(ORDERING_RULE_NAME);
087
088
089
090  /**
091   * The OID for the caseIgnoreOrderingMatch ordering matching rule.
092   */
093  public static final String ORDERING_RULE_OID = "2.5.13.3";
094
095
096
097  /**
098   * The name for the caseIgnoreSubstringsMatch substring matching rule.
099   */
100  public static final String SUBSTRING_RULE_NAME = "caseIgnoreSubstringsMatch";
101
102
103
104  /**
105   * The name for the caseIgnoreSubstringsMatch substring matching rule,
106   * formatted in all lowercase characters.
107   */
108  static final String LOWER_SUBSTRING_RULE_NAME =
109       toLowerCase(SUBSTRING_RULE_NAME);
110
111
112
113  /**
114   * The OID for the caseIgnoreSubstringsMatch substring matching rule.
115   */
116  public static final String SUBSTRING_RULE_OID = "2.5.13.4";
117
118
119
120  /**
121   * The serial version UID for this serializable class.
122   */
123  private static final long serialVersionUID = -1293370922676445525L;
124
125
126
127  /**
128   * Creates a new instance of this case ignore string matching rule.
129   */
130  public CaseIgnoreStringMatchingRule()
131  {
132    // No implementation is required.
133  }
134
135
136
137  /**
138   * Retrieves a singleton instance of this matching rule.
139   *
140   * @return  A singleton instance of this matching rule.
141   */
142  public static CaseIgnoreStringMatchingRule getInstance()
143  {
144    return INSTANCE;
145  }
146
147
148
149  /**
150   * {@inheritDoc}
151   */
152  @Override()
153  public String getEqualityMatchingRuleName()
154  {
155    return EQUALITY_RULE_NAME;
156  }
157
158
159
160  /**
161   * {@inheritDoc}
162   */
163  @Override()
164  public String getEqualityMatchingRuleOID()
165  {
166    return EQUALITY_RULE_OID;
167  }
168
169
170
171  /**
172   * {@inheritDoc}
173   */
174  @Override()
175  public String getOrderingMatchingRuleName()
176  {
177    return ORDERING_RULE_NAME;
178  }
179
180
181
182  /**
183   * {@inheritDoc}
184   */
185  @Override()
186  public String getOrderingMatchingRuleOID()
187  {
188    return ORDERING_RULE_OID;
189  }
190
191
192
193  /**
194   * {@inheritDoc}
195   */
196  @Override()
197  public String getSubstringMatchingRuleName()
198  {
199    return SUBSTRING_RULE_NAME;
200  }
201
202
203
204  /**
205   * {@inheritDoc}
206   */
207  @Override()
208  public String getSubstringMatchingRuleOID()
209  {
210    return SUBSTRING_RULE_OID;
211  }
212
213
214
215  /**
216   * {@inheritDoc}
217   */
218  @Override()
219  public boolean valuesMatch(final ASN1OctetString value1,
220                             final ASN1OctetString value2)
221  {
222    // Try to use a quick, no-copy determination if possible.  If this fails,
223    // then we'll fall back on a more thorough, but more costly, approach.
224    final byte[] value1Bytes = value1.getValue();
225    final byte[] value2Bytes = value2.getValue();
226    if (value1Bytes.length == value2Bytes.length)
227    {
228      for (int i=0; i< value1Bytes.length; i++)
229      {
230        final byte b1 = value1Bytes[i];
231        final byte b2 = value2Bytes[i];
232
233        if (((b1 & 0x7F) != (b1 & 0xFF)) ||
234            ((b2 & 0x7F) != (b2 & 0xFF)))
235        {
236          return normalize(value1).equals(normalize(value2));
237        }
238        else if (b1 != b2)
239        {
240          if ((b1 == ' ') || (b2 == ' '))
241          {
242            return normalize(value1).equals(normalize(value2));
243          }
244          else if (Character.isUpperCase((char) b1))
245          {
246            final char c = Character.toLowerCase((char) b1);
247            if (c != ((char) b2))
248            {
249              return false;
250            }
251          }
252          else if (Character.isUpperCase((char) b2))
253          {
254            final char c = Character.toLowerCase((char) b2);
255            if (c != ((char) b1))
256            {
257              return false;
258            }
259          }
260          else
261          {
262            return false;
263          }
264        }
265      }
266
267      // If we've gotten to this point, then the values must be equal.
268      return true;
269    }
270    else
271    {
272      return normalizeInternal(value1, false, (byte) 0x00).equals(
273                  normalizeInternal(value2, false, (byte) 0x00));
274    }
275  }
276
277
278
279  /**
280   * {@inheritDoc}
281   */
282  @Override()
283  public ASN1OctetString normalize(final ASN1OctetString value)
284  {
285    return normalizeInternal(value, false, (byte) 0x00);
286  }
287
288
289
290  /**
291   * {@inheritDoc}
292   */
293  @Override()
294  public ASN1OctetString normalizeSubstring(final ASN1OctetString value,
295                                            final byte substringType)
296  {
297    return normalizeInternal(value, true, substringType);
298  }
299
300
301
302  /**
303   * Normalizes the provided value for use in either an equality or substring
304   * matching operation.
305   *
306   * @param  value          The value to be normalized.
307   * @param  isSubstring    Indicates whether the value should be normalized as
308   *                        part of a substring assertion rather than an
309   *                        equality assertion.
310   * @param  substringType  The substring type for the element, if it is to be
311   *                        part of a substring assertion.
312   *
313   * @return  The appropriately normalized form of the provided value.
314   */
315  private static ASN1OctetString normalizeInternal(final ASN1OctetString value,
316                                                   final boolean isSubstring,
317                                                   final byte substringType)
318  {
319    final byte[] valueBytes = value.getValue();
320    if (valueBytes.length == 0)
321    {
322      return value;
323    }
324
325    final boolean trimInitial;
326    final boolean trimFinal;
327    if (isSubstring)
328    {
329      switch (substringType)
330      {
331        case SUBSTRING_TYPE_SUBINITIAL:
332          trimInitial = true;
333          trimFinal   = false;
334          break;
335
336        case SUBSTRING_TYPE_SUBFINAL:
337          trimInitial = false;
338          trimFinal   = true;
339          break;
340
341        default:
342          trimInitial = false;
343          trimFinal   = false;
344          break;
345      }
346    }
347    else
348    {
349      trimInitial = true;
350      trimFinal   = true;
351    }
352
353    // Count the number of duplicate spaces in the value, and determine whether
354    // there are any non-space characters.  Also, see if there are any non-ASCII
355    // characters.
356    boolean containsNonSpace = false;
357    boolean lastWasSpace = trimInitial;
358    int numDuplicates = 0;
359    for (final byte b : valueBytes)
360    {
361      if ((b & 0x7F) != (b & 0xFF))
362      {
363        return normalizeNonASCII(value, trimInitial, trimFinal);
364      }
365
366      if (b == ' ')
367      {
368        if (lastWasSpace)
369        {
370          numDuplicates++;
371        }
372        else
373        {
374          lastWasSpace = true;
375        }
376      }
377      else
378      {
379        containsNonSpace = true;
380        lastWasSpace = false;
381      }
382    }
383
384    if (! containsNonSpace)
385    {
386      return new ASN1OctetString(" ");
387    }
388
389    if (lastWasSpace && trimFinal)
390    {
391      numDuplicates++;
392    }
393
394
395    // Create a new byte array to hold the normalized value.
396    lastWasSpace = trimInitial;
397    int targetPos = 0;
398    final byte[] normalizedBytes = new byte[valueBytes.length - numDuplicates];
399    for (int i=0; i < valueBytes.length; i++)
400    {
401      switch (valueBytes[i])
402      {
403        case ' ':
404          if (lastWasSpace || (trimFinal && (i == (valueBytes.length - 1))))
405          {
406            // No action is required.
407          }
408          else
409          {
410            // This condition is needed to handle the special case in which
411            // there are multiple spaces at the end of the value.
412            if (targetPos < normalizedBytes.length)
413            {
414              normalizedBytes[targetPos++] = ' ';
415              lastWasSpace = true;
416            }
417          }
418
419          break;
420        case 'A':
421          normalizedBytes[targetPos++] = 'a';
422          lastWasSpace = false;
423          break;
424        case 'B':
425          normalizedBytes[targetPos++] = 'b';
426          lastWasSpace = false;
427          break;
428        case 'C':
429          normalizedBytes[targetPos++] = 'c';
430          lastWasSpace = false;
431          break;
432        case 'D':
433          normalizedBytes[targetPos++] = 'd';
434          lastWasSpace = false;
435          break;
436        case 'E':
437          normalizedBytes[targetPos++] = 'e';
438          lastWasSpace = false;
439          break;
440        case 'F':
441          normalizedBytes[targetPos++] = 'f';
442          lastWasSpace = false;
443          break;
444        case 'G':
445          normalizedBytes[targetPos++] = 'g';
446          lastWasSpace = false;
447          break;
448        case 'H':
449          normalizedBytes[targetPos++] = 'h';
450          lastWasSpace = false;
451          break;
452        case 'I':
453          normalizedBytes[targetPos++] = 'i';
454          lastWasSpace = false;
455          break;
456        case 'J':
457          normalizedBytes[targetPos++] = 'j';
458          lastWasSpace = false;
459          break;
460        case 'K':
461          normalizedBytes[targetPos++] = 'k';
462          lastWasSpace = false;
463          break;
464        case 'L':
465          normalizedBytes[targetPos++] = 'l';
466          lastWasSpace = false;
467          break;
468        case 'M':
469          normalizedBytes[targetPos++] = 'm';
470          lastWasSpace = false;
471          break;
472        case 'N':
473          normalizedBytes[targetPos++] = 'n';
474          lastWasSpace = false;
475          break;
476        case 'O':
477          normalizedBytes[targetPos++] = 'o';
478          lastWasSpace = false;
479          break;
480        case 'P':
481          normalizedBytes[targetPos++] = 'p';
482          lastWasSpace = false;
483          break;
484        case 'Q':
485          normalizedBytes[targetPos++] = 'q';
486          lastWasSpace = false;
487          break;
488        case 'R':
489          normalizedBytes[targetPos++] = 'r';
490          lastWasSpace = false;
491          break;
492        case 'S':
493          normalizedBytes[targetPos++] = 's';
494          lastWasSpace = false;
495          break;
496        case 'T':
497          normalizedBytes[targetPos++] = 't';
498          lastWasSpace = false;
499          break;
500        case 'U':
501          normalizedBytes[targetPos++] = 'u';
502          lastWasSpace = false;
503          break;
504        case 'V':
505          normalizedBytes[targetPos++] = 'v';
506          lastWasSpace = false;
507          break;
508        case 'W':
509          normalizedBytes[targetPos++] = 'w';
510          lastWasSpace = false;
511          break;
512        case 'X':
513          normalizedBytes[targetPos++] = 'x';
514          lastWasSpace = false;
515          break;
516        case 'Y':
517          normalizedBytes[targetPos++] = 'y';
518          lastWasSpace = false;
519          break;
520        case 'Z':
521          normalizedBytes[targetPos++] = 'z';
522          lastWasSpace = false;
523          break;
524        default:
525          normalizedBytes[targetPos++] = valueBytes[i];
526          lastWasSpace = false;
527          break;
528      }
529    }
530
531
532    return new ASN1OctetString(normalizedBytes);
533  }
534
535
536
537  /**
538   * Normalizes the provided value a string representation, properly handling
539   * any non-ASCII characters.
540   *
541   * @param  value        The value to be normalized.
542   * @param  trimInitial  Indicates whether to trim off all leading spaces at
543   *                      the beginning of the value.
544   * @param  trimFinal    Indicates whether to trim off all trailing spaces at
545   *                      the end of the value.
546   *
547   * @return  The normalized form of the value.
548   */
549  private static ASN1OctetString normalizeNonASCII(final ASN1OctetString value,
550                                                   final boolean trimInitial,
551                                                   final boolean trimFinal)
552  {
553    final StringBuilder buffer = new StringBuilder(value.stringValue());
554
555    int pos = 0;
556    boolean lastWasSpace = trimInitial;
557    while (pos < buffer.length())
558    {
559      final char c = buffer.charAt(pos++);
560      if (c == ' ')
561      {
562        if (lastWasSpace || (trimFinal && (pos >= buffer.length())))
563        {
564          buffer.deleteCharAt(--pos);
565        }
566        else
567        {
568          lastWasSpace = true;
569        }
570      }
571      else
572      {
573        if (Character.isUpperCase(c))
574        {
575          buffer.setCharAt((pos-1), Character.toLowerCase(c));
576        }
577
578        lastWasSpace = false;
579      }
580    }
581
582    // It is possible that there could be an extra space at the end.  If that's
583    // the case, then remove it.
584    if (trimFinal && (buffer.length() > 0) &&
585        (buffer.charAt(buffer.length() - 1) == ' '))
586    {
587      buffer.deleteCharAt(buffer.length() - 1);
588    }
589
590    return new ASN1OctetString(buffer.toString());
591  }
592}