1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Base classes for storage interfaces.
22
23 @organization: Zuza Software Foundation
24 @copyright: 2006-2009 Zuza Software Foundation
25 @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
26 """
27
28 import logging
29 try:
30 import cPickle as pickle
31 except ImportError:
32 import pickle
33 from exceptions import NotImplementedError
34
35 import translate.i18n
36 from translate.misc.multistring import multistring
37 from translate.misc.typecheck import accepts, Self, IsOneOf
38 from translate.storage.placeables import StringElem, general, parse as rich_parse
39 from translate.storage.workflow import StateEnum as states
40
41
43 """Forces derived classes to override method."""
44
45 if type(method.im_self) == type(baseclass):
46
47 actualclass = method.im_self
48 else:
49 actualclass = method.im_class
50 if actualclass != baseclass:
51 raise NotImplementedError(
52 "%s does not reimplement %s as required by %s" % \
53 (actualclass.__name__, method.__name__, baseclass.__name__))
54
55
57
59 self.inner_exc = inner_exc
60
62 return repr(self.inner_exc)
63
64
66 """Base class for translation units.
67
68 Our concept of a I{translation unit} is influenced heavily by XLIFF:
69 U{http://www.oasis-open.org/committees/xliff/documents/xliff-specification.htm}
70
71 As such most of the method- and variable names borrows from XLIFF
72 terminology.
73
74 A translation unit consists of the following:
75 - A I{source} string. This is the original translatable text.
76 - A I{target} string. This is the translation of the I{source}.
77 - Zero or more I{notes} on the unit. Notes would typically be some
78 comments from a translator on the unit, or some comments originating
79 from the source code.
80 - Zero or more I{locations}. Locations indicate where in the original
81 source code this unit came from.
82 - Zero or more I{errors}. Some tools (eg. L{pofilter<filters.pofilter>})
83 can run checks on translations and produce error messages.
84
85 @group Source: *source*
86 @group Target: *target*
87 @group Notes: *note*
88 @group Locations: *location*
89 @group Errors: *error*
90 """
91
92 rich_parsers = []
93 """A list of functions to use for parsing a string into a rich string
94 tree."""
95
96
97 S_OBSOLETE = states.OBSOLETE
98 S_EMPTY = states.EMPTY
99 S_NEEDS_WORK = states.NEEDS_WORK
100 S_REJECTED = states.REJECTED
101 S_NEEDS_REVIEW = states.NEEDS_REVIEW
102 S_UNREVIEWED = states.UNREVIEWED
103 S_FINAL = states.FINAL
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127 STATE = {}
128
129 _store = None
130 _source = None
131 _target = None
132 _rich_source = None
133 _rich_target = None
134 _state_n = 0
135 notes = ""
136
138 """Constructs a TranslationUnit containing the given source string."""
139 if source is not None:
140 self.source = source
141
143 """Compares two TranslationUnits.
144
145 @type other: L{TranslationUnit}
146 @param other: Another L{TranslationUnit}
147 @rtype: Boolean
148 @return: Returns True if the supplied TranslationUnit equals this unit.
149 """
150 return self.source == other.source and self.target == other.target
151
153 """Converts to a string representation that can be parsed back using
154 L{parsestring()}."""
155
156 store = getattr(self, "_store", None)
157 self._store = None
158 dump = pickle.dumps(self)
159 self._store = store
160 return dump
161
163 """Convert a "rich" string tree to a C{multistring}:
164
165 >>> from translate.storage.placeables.interfaces import X
166 >>> rich = [StringElem(['foo', X(id='xxx', sub=[' ']), 'bar'])]
167 >>> TranslationUnit.rich_to_multistring(rich)
168 multistring(u'foo bar')
169 """
170 return multistring([unicode(elem) for elem in elem_list])
171 rich_to_multistring = classmethod(rich_to_multistring)
172
174 """Convert a multistring to a list of "rich" string trees:
175
176 >>> target = multistring([u'foo', u'bar', u'baz'])
177 >>> TranslationUnit.multistring_to_rich(target)
178 [<StringElem([<StringElem([u'foo'])>])>,
179 <StringElem([<StringElem([u'bar'])>])>,
180 <StringElem([<StringElem([u'baz'])>])>]
181 """
182 if isinstance(mulstring, multistring):
183 return [rich_parse(s, self.rich_parsers) for s in mulstring.strings]
184 return [rich_parse(mulstring, self.rich_parsers)]
185
190 source = property(lambda self: self._source, setsource)
191
196 target = property(lambda self: self._target, settarget)
197
202
204 if not hasattr(value, '__iter__'):
205 raise ValueError('value must be iterable')
206 if len(value) < 1:
207 raise ValueError('value must have at least one element.')
208 if not isinstance(value[0], StringElem):
209 raise ValueError('value[0] must be of type StringElem.')
210 self._rich_source = list(value)
211 multi = self.rich_to_multistring(value)
212 if self.source != multi:
213 self.source = multi
214 rich_source = property(_get_rich_source, _set_rich_source)
215 """ @see: rich_to_multistring
216 @see: multistring_to_rich"""
217
222
224 if not hasattr(value, '__iter__'):
225 raise ValueError('value must be iterable')
226 if len(value) < 1:
227 raise ValueError('value must have at least one element.')
228 if not isinstance(value[0], StringElem):
229 raise ValueError('value[0] must be of type StringElem.')
230 self._rich_target = list(value)
231 self.target = self.rich_to_multistring(value)
232 rich_target = property(_get_rich_target, _set_rich_target)
233 """ @see: rich_to_multistring
234 @see: multistring_to_rich"""
235
237 """Returns the length of the target string.
238
239 @note: Plural forms might be combined.
240 @rtype: Integer
241 """
242 length = len(self.target or "")
243 strings = getattr(self.target, "strings", [])
244 if strings:
245 length += sum([len(pluralform) for pluralform in strings[1:]])
246 return length
247
249 """A unique identifier for this unit.
250
251 @rtype: string
252 @return: an identifier for this unit that is unique in the store
253
254 Derived classes should override this in a way that guarantees a unique
255 identifier for each unit in the store.
256 """
257 return self.source
258
260 """Sets the unique identified for this unit.
261
262 only implemented if format allows ids independant from other
263 unit properties like source or context"""
264 pass
265
267 """A list of source code locations.
268
269 @note: Shouldn't be implemented if the format doesn't support it.
270 @rtype: List
271 """
272 return []
273
275 """Add one location to the list of locations.
276
277 @note: Shouldn't be implemented if the format doesn't support it.
278 """
279 pass
280
282 """Add a location or a list of locations.
283
284 @note: Most classes shouldn't need to implement this,
285 but should rather implement L{addlocation()}.
286 @warning: This method might be removed in future.
287 """
288 if isinstance(location, list):
289 for item in location:
290 self.addlocation(item)
291 else:
292 self.addlocation(location)
293
294 - def getcontext(self):
295 """Get the message context."""
296 return ""
297
298 - def setcontext(self, context):
299 """Set the message context"""
300 pass
301
303 """Returns all notes about this unit.
304
305 It will probably be freeform text or something reasonable that can be
306 synthesised by the format.
307 It should not include location comments (see L{getlocations()}).
308 """
309 return getattr(self, "notes", "")
310
311 - def addnote(self, text, origin=None, position="append"):
312 """Adds a note (comment).
313
314 @type text: string
315 @param text: Usually just a sentence or two.
316 @type origin: string
317 @param origin: Specifies who/where the comment comes from.
318 Origin can be one of the following text strings:
319 - 'translator'
320 - 'developer', 'programmer', 'source code' (synonyms)
321 """
322 if position == "append" and getattr(self, "notes", None):
323 self.notes += '\n' + text
324 else:
325 self.notes = text
326
328 """Remove all the translator's notes."""
329 self.notes = u''
330
331 - def adderror(self, errorname, errortext):
332 """Adds an error message to this unit.
333
334 @type errorname: string
335 @param errorname: A single word to id the error.
336 @type errortext: string
337 @param errortext: The text describing the error.
338 """
339 pass
340
342 """Get all error messages.
343
344 @rtype: Dictionary
345 """
346 return {}
347
349 """Marks the unit to indicate whether it needs review.
350
351 @keyword needsreview: Defaults to True.
352 @keyword explanation: Adds an optional explanation as a note.
353 """
354 pass
355
357 """Indicates whether this unit is translated.
358
359 This should be used rather than deducing it from .target,
360 to ensure that other classes can implement more functionality
361 (as XLIFF does).
362 """
363 return bool(self.target) and not self.isfuzzy()
364
366 """Indicates whether this unit can be translated.
367
368 This should be used to distinguish real units for translation from
369 header, obsolete, binary or other blank units.
370 """
371 return bool(self.source)
372
374 """Indicates whether this unit is fuzzy."""
375 return False
376
378 """Marks the unit as fuzzy or not."""
379 pass
380
382 """indicate whether a unit is obsolete"""
383 return False
384
386 """Make a unit obsolete"""
387 pass
388
390 """Indicates whether this unit is a header."""
391 return False
392
394 """Indicates whether this unit needs review."""
395 return False
396
398 """Used to see if this unit has no source or target string.
399
400 @note: This is probably used more to find translatable units,
401 and we might want to move in that direction rather and get rid of this.
402 """
403 return not (self.source or self.target)
404
406 """Tells whether or not this specific unit has plural strings."""
407
408 return False
409
412
415
416 - def merge(self, otherunit, overwrite=False, comments=True,
417 authoritative=False):
421
423 """Iterator that only returns this unit."""
424 yield self
425
427 """This unit in a list."""
428 return [self]
429
445 buildfromunit = classmethod(buildfromunit)
446
447 xid = property(lambda self: None, lambda self, value: None)
448 rid = property(lambda self: None, lambda self, value: None)
449
451 if n is None:
452 n = self.get_state_n()
453 for state_id, state_range in self.STATE.iteritems():
454 if state_range[0] <= n < state_range[1]:
455 return state_id
456 if self.STATE:
457 raise ValueError('No state containing value %s' % (n))
458 else:
459 return n
460
466
469
471 """Empty method that should be overridden in sub-classes to infer the
472 current state(_n) of the unit from its current state."""
473 pass
474
475
477 """Base class for stores for multiple translation units of type
478 UnitClass."""
479
480 UnitClass = TranslationUnit
481 """The class of units that will be instantiated and used by this class"""
482 Name = "Base translation store"
483 """The human usable name of this store type"""
484 Mimetypes = None
485 """A list of MIME types associated with this store type"""
486 Extensions = None
487 """A list of file extentions associated with this store type"""
488 _binary = False
489 """Indicates whether a file should be accessed as a binary file."""
490 suggestions_in_format = False
491 """Indicates if format can store suggestions and alternative translation
492 for a unit"""
493
494 sourcelanguage = None
495 targetlanguage = None
496
498 """Constructs a blank TranslationStore."""
499 self.units = []
500 if unitclass:
501 self.UnitClass = unitclass
502
504 """Gets the source language for this store"""
505 return self.sourcelanguage
506
510
512 """Gets the target language for this store"""
513 return self.targetlanguage
514
518
520 """Gets the project type for this store"""
521 return getattr(self, '_project_style', None)
522
524 """Sets the project type for this store"""
525 self._project_style = project_style
526
528 """Iterator over all the units in this store."""
529 for unit in self.units:
530 yield unit
531
533 """Return a list of all units in this store."""
534 return [unit for unit in self.unit_iter()]
535
537 """Appends the given unit to the object's list of units.
538
539 This method should always be used rather than trying to modify the
540 list manually.
541
542 @type unit: L{TranslationUnit}
543 @param unit: The unit that will be added.
544 """
545 unit._store = self
546 self.units.append(unit)
547
549 """Adds and returns a new unit with the given source string.
550
551 @rtype: L{TranslationUnit}
552 """
553 unit = self.UnitClass(source)
554 self.addunit(unit)
555 return unit
556
558 """find unit with matching id by checking id_index"""
559 self.require_index()
560 return self.id_index.get(id, None)
561
563 """Finds the unit with the given source string.
564
565 @rtype: L{TranslationUnit} or None
566 """
567 if len(getattr(self, "sourceindex", [])):
568 if source in self.sourceindex:
569 return self.sourceindex[source][0]
570 else:
571 for unit in self.units:
572 if unit.source == source:
573 return unit
574 return None
575
577 """Finds the units with the given source string.
578
579 @rtype: L{TranslationUnit} or None
580 """
581 if len(getattr(self, "sourceindex", [])):
582 if source in self.sourceindex:
583 return self.sourceindex[source]
584 else:
585
586
587 result = []
588 for unit in self.units:
589 if unit.source == source:
590 result.append(unit)
591 return result
592 return None
593
595 """Returns the translated string for a given source string.
596
597 @rtype: String or None
598 """
599 unit = self.findunit(source)
600 if unit and unit.target:
601 return unit.target
602 else:
603 return None
604
606 """Remove a unit from source and locaton indexes"""
607
608 def remove_unit(source):
609 if source in self.sourceindex:
610 try:
611 self.sourceindex[source].remove(unit)
612 if len(self.sourceindex[source]) == 0:
613 del(self.sourceindex[source])
614 except ValueError:
615 pass
616
617 if unit.hasplural():
618 for source in unit.source.strings:
619 remove_unit(source)
620 else:
621 remove_unit(unit.source)
622
623 for location in unit.getlocations():
624 if location in self.locationindex \
625 and self.locationindex[location] is not None \
626 and self.locationindex[location] == unit:
627 del(self.locationindex[location])
628
630 """Add a unit to source and location idexes"""
631 self.id_index[unit.getid()] = unit
632
633 def insert_unit(source):
634 if not source in self.sourceindex:
635 self.sourceindex[source] = [unit]
636 else:
637 self.sourceindex[source].append(unit)
638
639 if unit.hasplural():
640 for source in unit.source.strings:
641 insert_unit(source)
642 else:
643 insert_unit(unit.source)
644
645 for location in unit.getlocations():
646 if location in self.locationindex:
647
648
649 self.locationindex[location] = None
650 else:
651 self.locationindex[location] = unit
652
654 """Indexes the items in this store. At least .sourceindex should be
655 usefull."""
656 self.locationindex = {}
657 self.sourceindex = {}
658 self.id_index = {}
659 for index, unit in enumerate(self.units):
660 unit.index = index
661 if not (unit.isheader() or unit.isblank()):
662 self.add_unit_to_index(unit)
663
665 """make sure source index exists"""
666 if not hasattr(self, "id_index"):
667 self.makeindex()
668
669 - def getids(self, filename=None):
670 """return a list of unit ids"""
671 self.require_index()
672 return self.id_index.keys()
673
675 odict = self.__dict__.copy()
676 odict['fileobj'] = None
677 return odict
678
680 self.__dict__.update(dict)
681 if getattr(self, "filename", False):
682 self.fileobj = open(self.filename)
683
685 """Converts to a string representation that can be parsed back using
686 L{parsestring()}."""
687
688 fileobj = getattr(self, "fileobj", None)
689 self.fileobj = None
690 dump = pickle.dumps(self)
691 self.fileobj = fileobj
692 return dump
693
695 """Returns True if the object doesn't contain any translation units."""
696 if len(self.units) == 0:
697 return True
698 for unit in self.units:
699 if unit.istranslatable():
700 return False
701 return True
702
704 """Tries to work out what the name of the filesystem file is and
705 assigns it to .filename."""
706 fileobj = getattr(self, "fileobj", None)
707 if fileobj:
708 filename = getattr(fileobj, "name",
709 getattr(fileobj, "filename", None))
710 if filename:
711 self.filename = filename
712
714 """Converts the string representation back to an object."""
715 newstore = cls()
716 if storestring:
717 newstore.parse(storestring)
718 return newstore
719 parsestring = classmethod(parsestring)
720
722 if not default_encodings:
723 default_encodings = ['utf-8']
724 try:
725 import chardet
726
727 detected_encoding = chardet.detect(text.replace("…", ""))
728 if detected_encoding['confidence'] < 0.48:
729 detected_encoding = None
730 elif detected_encoding['encoding'] == 'ascii':
731 detected_encoding['encoding'] = 'utf-8'
732 except ImportError:
733 detected_encoding = None
734
735 encodings = []
736 if self.encoding == 'auto':
737 if detected_encoding and detected_encoding['encoding'] not in encodings:
738 encodings.append(detected_encoding['encoding'])
739 for encoding in default_encodings:
740 if encoding not in encodings:
741 encodings.append(encoding)
742 else:
743 encodings.append(self.encoding)
744 if detected_encoding and detected_encoding['encoding'] != self.encoding:
745 logging.warn("trying to parse %s with encoding: %s but detected encoding is %s (confidence: %s)",
746 self.filename, self.encoding, detected_encoding['encoding'], detected_encoding['confidence'])
747 encodings.append(self.encoding)
748
749 for encoding in encodings:
750 try:
751 r_text = unicode(text, encoding)
752 r_encoding = encoding
753 break
754 except UnicodeDecodeError:
755 r_text = None
756 r_encoding = None
757 if r_encoding == 'ascii':
758 r_encoding = 'utf-8'
759 return r_text, r_encoding
760
762 """parser to process the given source string"""
763 self.units = pickle.loads(data).units
764
766 """Writes the string representation to the given file (or filename)."""
767 storestring = str(self)
768 if isinstance(storefile, basestring):
769 mode = 'w'
770 if self._binary:
771 mode = 'wb'
772 storefile = open(storefile, mode)
773 self.fileobj = storefile
774 self._assignname()
775 storefile.write(storestring)
776 storefile.close()
777
779 """Save to the file that data was originally read from, if
780 available."""
781 fileobj = getattr(self, "fileobj", None)
782 mode = 'w'
783 if self._binary:
784 mode = 'wb'
785 if not fileobj:
786 filename = getattr(self, "filename", None)
787 if filename:
788 fileobj = file(filename, mode)
789 else:
790 fileobj.close()
791 filename = getattr(fileobj, "name",
792 getattr(fileobj, "filename", None))
793 if not filename:
794 raise ValueError("No file or filename to save to")
795 fileobj = fileobj.__class__(filename, mode)
796 self.savefile(fileobj)
797
799 """Reads the given file (or opens the given filename) and parses back
800 to an object."""
801 mode = 'r'
802 if cls._binary:
803 mode = 'rb'
804 if isinstance(storefile, basestring):
805 storefile = open(storefile, mode)
806 mode = getattr(storefile, "mode", mode)
807
808 if mode == 1 or "r" in mode:
809 storestring = storefile.read()
810 storefile.close()
811 else:
812 storestring = ""
813 newstore = cls.parsestring(storestring)
814 newstore.fileobj = storefile
815 newstore._assignname()
816 return newstore
817 parsefile = classmethod(parsefile)
818