Package translate :: Package tools :: Module pretranslate
[hide private]
[frames] | no frames]

Source Code for Module translate.tools.pretranslate

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2008 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Fill localization files with suggested translations based on 
 22  translation memory and existing translations. 
 23  """ 
 24   
 25  from translate.storage import factory 
 26  from translate.storage import xliff, po 
 27  from translate.search import match 
 28   
 29  # We don't want to reinitialise the TM each time, so let's store it here. 
 30  tmmatcher = None 
 31   
 32   
33 -def memory(tmfiles, max_candidates=1, min_similarity=75, max_length=1000):
34 """Returns the TM store to use. Only initialises on first call.""" 35 global tmmatcher 36 # Only initialise first time 37 if tmmatcher is None: 38 if isinstance(tmfiles, list): 39 tmstore = [factory.getobject(tmfile) for tmfile in tmfiles] 40 else: 41 tmstore = factory.getobject(tmfiles) 42 tmmatcher = match.matcher(tmstore, max_candidates=max_candidates, min_similarity=min_similarity, max_length=max_length) 43 return tmmatcher
44 45
46 -def pretranslate_file(input_file, output_file, template_file, tm=None, min_similarity=75, fuzzymatching=True):
47 """Pretranslate any factory supported file with old translations and translation memory.""" 48 input_store = factory.getobject(input_file) 49 template_store = None 50 if template_file is not None: 51 template_store = factory.getobject(template_file) 52 53 output = pretranslate_store(input_store, template_store, tm, min_similarity, fuzzymatching) 54 output_file.write(str(output)) 55 return 1
56 57
58 -def match_template_location(input_unit, template_store):
59 """Returns a matching unit from a template. matching based on locations""" 60 # we want to use slightly different matching strategies for PO files 61 # generated by our own moz2po and oo2po. Let's take a cheap shot at 62 # detecting them from the presence of a ':' in the first location. 63 locations = input_unit.getlocations() 64 if not locations or ":" in locations[0]: 65 return match_template_id(input_unit, template_store) 66 67 # since oo2po and moz2po use location as unique identifiers for strings 68 # we match against location first, then check for matching source strings 69 # this makes no sense for normal gettext files 70 for location in locations: 71 matching_unit = template_store.locationindex.get(location, None) 72 if matching_unit is not None and matching_unit.source == input_unit.source and matching_unit.gettargetlen() > 0: 73 return matching_unit
74
75 -def match_template_id(input_unit, template_store):
76 """Returns a matching unit from a template. matching based on unit id""" 77 matching_unit = template_store.findid(input_unit.getid()) 78 return matching_unit
79
80 -def match_source(input_unit, template_store):
81 """Returns a matching unit from a template. matching based on unit id""" 82 # hack for weird mozilla single letter strings, we don't want to 83 # match them by anything but locations 84 if len(input_unit.source) > 1: 85 matching_unit = template_store.findunit(input_unit.source) 86 return matching_unit
87
88 -def match_fuzzy(input_unit, matchers):
89 """Return a fuzzy match from a queue of matchers.""" 90 for matcher in matchers: 91 fuzzycandidates = matcher.matches(input_unit.source) 92 if fuzzycandidates: 93 return fuzzycandidates[0]
94 95
96 -def pretranslate_unit(input_unit, template_store, matchers=None, mark_reused=False, match_locations=False):
97 """Pretranslate a unit or return unchanged if no translation was found.""" 98 99 matching_unit = None 100 #do template matching 101 if template_store: 102 if match_locations: 103 matching_unit = match_template_location(input_unit, template_store) 104 else: 105 matching_unit = match_template_id(input_unit, template_store) 106 107 108 if matching_unit and matching_unit.gettargetlen() > 0: 109 input_unit.merge(matching_unit, authoritative=True) 110 elif matchers: 111 # quickly try exact match by source 112 matching_unit = match_source(input_unit, template_store) 113 114 if not matching_unit or not matching_unit.gettargetlen(): 115 #do fuzzy matching 116 matching_unit = match_fuzzy(input_unit, matchers) 117 118 if matching_unit and matching_unit.gettargetlen() > 0: 119 #FIXME: should we dispatch here instead of this crude type check 120 if isinstance(input_unit, xliff.xliffunit): 121 #FIXME: what about origin, lang and matchquality 122 input_unit.addalttrans(matching_unit.target, origin="fish", sourcetxt=matching_unit.source) 123 else: 124 input_unit.merge(matching_unit, authoritative=True) 125 126 #FIXME: ugly hack required by pot2po to mark old 127 #translations reused for new file. loops over 128 if mark_reused and matching_unit and template_store: 129 original_unit = template_store.findunit(matching_unit.source) 130 if original_unit is not None: 131 original_unit.reused = True 132 133 return input_unit
134
135 -def pretranslate_store(input_store, template_store, tm=None, min_similarity=75, fuzzymatching=True):
136 """Do the actual pretranslation of a whole store.""" 137 #preperation 138 matchers = [] 139 #prepare template 140 if template_store is not None: 141 template_store.makeindex() 142 #template preparation based on type 143 prepare_template = "prepare_template_%s" % template_store.__class__.__name__ 144 if prepare_template in globals(): 145 globals()[prepare_template](template_store) 146 147 if fuzzymatching: 148 #create template matcher 149 #FIXME: max_length hardcoded 150 matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True) 151 matcher.addpercentage = False 152 matchers.append(matcher) 153 154 #prepare tm 155 #create tm matcher 156 if tm and fuzzymatching: 157 #FIXME: max_length hardcoded 158 matcher = memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000) 159 matcher.addpercentage = False 160 matchers.append(matcher) 161 162 #main loop 163 match_locations = isinstance(input_store, po.pofile) and input_store.parseheader().get('X-Accelerator-Marker') in ('&', '~') 164 for input_unit in input_store.units: 165 if input_unit.istranslatable(): 166 input_unit = pretranslate_unit(input_unit, template_store, matchers, match_locations=match_locations) 167 168 return input_store
169 170
171 -def main(argv=None):
172 from translate.convert import convert 173 formats = {"pot": ("po", pretranslate_file), ("pot", "po"): ("po", pretranslate_file), 174 "po": ("po", pretranslate_file), ("po", "po"): ("po", pretranslate_file), 175 "xlf": ("xlf", pretranslate_file), ("xlf", "xlf"): ("xlf", pretranslate_file), 176 } 177 parser = convert.ConvertOptionParser(formats, usetemplates=True, 178 allowmissingtemplate=True, description=__doc__) 179 parser.add_option("", "--tm", dest="tm", default=None, 180 help="The file to use as translation memory when fuzzy matching") 181 parser.passthrough.append("tm") 182 defaultsimilarity = 75 183 parser.add_option("-s", "--similarity", dest="min_similarity", default=defaultsimilarity, 184 type="float", help="The minimum similarity for inclusion (default: %d%%)" % defaultsimilarity) 185 parser.passthrough.append("min_similarity") 186 parser.add_option("--nofuzzymatching", dest="fuzzymatching", action="store_false", 187 default=True, help="Disable fuzzy matching") 188 parser.passthrough.append("fuzzymatching") 189 parser.run(argv)
190 191 192 if __name__ == '__main__': 193 main() 194