[IMP] account-export-csv - use generator with yield to avoid filling the worker memory with enormous lists of rows

pull/597/head
Yannick Vaucher 2013-09-13 11:01:19 +02:00 committed by David Beal
parent c59e40bab4
commit 0090799f3f
1 changed files with 36 additions and 28 deletions

View File

@ -19,6 +19,7 @@
# #
############################################################################## ##############################################################################
import itertools
import time import time
import tempfile import tempfile
import StringIO import StringIO
@ -46,16 +47,14 @@ class AccountUnicodeWriter(object):
self.stream = f self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)() self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row, base64_compress=False): def writerow(self, row):
#we ensure that we do not try to encode none or bool #we ensure that we do not try to encode none or bool
row = [x or u'' for x in row] row = (x or u'' for x in row)
encoded_row = [] encoded_row = []
for c in row: for c in row:
if type(c) == unicode: if isinstance(c, unicode):
val = c.encode("utf-8") val = c.encode("utf-8")
if base64_compress:
val = base64.encodestring(val)
else: else:
val = c val = c
@ -72,9 +71,9 @@ class AccountUnicodeWriter(object):
# empty queue # empty queue
self.queue.truncate(0) self.queue.truncate(0)
def writerows(self, rows, base64_compress=False): def writerows(self, rows):
for row in rows: for row in rows:
self.writerow(row, base64_compress=base64_compress) self.writerow(row)
class AccountCSVExport(orm.TransientModel): class AccountCSVExport(orm.TransientModel):
_name = 'account.csv.export' _name = 'account.csv.export'
@ -230,12 +229,19 @@ class AccountCSVExport(orm.TransientModel):
""" """
Here we use TemporaryFile to avoid full filling the OpenERP worker Memory Here we use TemporaryFile to avoid full filling the OpenERP worker Memory
We also write the data to the wizard with SQL query as write seams to use We also write the data to the wizard with SQL query as write seams to use
too much memory as well too much memory as well.
Thos improvment permitted to improve the export from a 100k line to 200k lines Those improvements permitted to improve the export from a 100k line to 200k lines
with default `limit_memory_hard = 805306368` (768MB) with default `limit_memory_hard = 805306368` (768MB) with more lines,
you might encounter a MemoryError when trying to download the file even
if it has been generated.
To be able to export bigger volume of data, it is advised to set
limit_memory_hard to (2 GB) to generate the file and let
OpenERP load it in the wizard when trying to download it.
Tested with up to a generation of 700k entry lines
""" """
#XXX check why it still fail with more than 200k line and when
this = self.browse(cr, uid, ids)[0] this = self.browse(cr, uid, ids)[0]
rows = self.get_data(cr, uid, ids, "journal_entries", context) rows = self.get_data(cr, uid, ids, "journal_entries", context)
with tempfile.TemporaryFile() as file_data: with tempfile.TemporaryFile() as file_data:
@ -297,7 +303,7 @@ class AccountCSVExport(orm.TransientModel):
company_id, company_id,
context=None): context=None):
""" """
Return list to generate rows of the CSV file Create a generator of rows of the CSV file
""" """
cr.execute(""" cr.execute("""
SELECT SELECT
@ -344,12 +350,15 @@ class AccountCSVExport(orm.TransientModel):
""", """,
{'period_ids': tuple(period_range_ids), 'journal_ids': tuple(journal_ids)} {'period_ids': tuple(period_range_ids), 'journal_ids': tuple(journal_ids)}
) )
res = cr.fetchall() while 1:
rows = [] # http://initd.org/psycopg/docs/cursor.html#cursor.fetchmany
for line in res: # Set cursor.arraysize to minimize network round trips
rows.append(list(line)) cr.arraysize=100
return rows rows = cr.fetchmany()
if not rows:
break
for row in rows:
yield row
def get_data(self, cr, uid, ids,result_type,context=None): def get_data(self, cr, uid, ids,result_type,context=None):
get_header_func = getattr(self,("_get_header_%s"%(result_type)), None) get_header_func = getattr(self,("_get_header_%s"%(result_type)), None)
@ -370,13 +379,12 @@ class AccountCSVExport(orm.TransientModel):
else: else:
j_obj = self.pool.get("account.journal") j_obj = self.pool.get("account.journal")
journal_ids = j_obj.search(cr, uid, [], context=context) journal_ids = j_obj.search(cr, uid, [], context=context)
rows = [] rows = itertools.chain((get_header_func(cr, uid, ids, context=context),),
rows.append(get_header_func(cr, uid, ids, context=context)) get_rows_func(cr, uid, ids,
rows.extend(get_rows_func(
cr, uid, ids,
fiscalyear_id, fiscalyear_id,
period_range_ids, period_range_ids,
journal_ids, journal_ids,
company_id, company_id,
context=context)) context=context)
)
return rows return rows