这是否意味着表中的列数会显着影响SQLAlchemy的性能?
以及那是一个艰难的一个,它可能更依赖于底层的SQL引擎,MySQL
在这种情况下,后来居然sqlalchemy
,这只不过是一种方式更同时使用相同的接口,不同的数据库引擎互动。
SQLAlchemy是Python SQL工具包和对象关系映射器,为应用程序开发人员提供了SQL的全部功能和灵活性。
它提供了一套完整的众所周知的企业级持久性模式,实现高效和高性能的数据库访问,改编成简单和Python的领域语言。
虽然我可能是错的,你可以尝试使用普通SQL
基准它。
我居然跑了一些测试...
import timeit
setup = """
from sqlalchemy import create_engine, MetaData, select, Table, Column
from sqlalchemy.dialects.sqlite import BOOLEAN, SMALLINT, VARCHAR
engine = create_engine('sqlite://', echo = False)
metadata = MetaData()
conn = engine.connect()
columns = []
for i in xrange(100):
columns.append(Column('c%d' % i, VARCHAR(1), nullable = False, server_default = '0'))
columns.append(Column('d%d' % i, VARCHAR(2), nullable = False, server_default = '00'))
user = Table('user', metadata, *columns)
user.create(engine)
conn.execute(user.insert(), [{}] * 4000)
user2 = Table('user2', metadata, Column('c0', VARCHAR(100), nullable = False, server_default = '0' * 100), \
Column('d0', VARCHAR(200), nullable = False, server_default = '0' * 200))
user2.create(engine)
conn.execute(user2.insert(), [{}] * 4000)
"""
many_columns = """
s1 = select([user]).compile(engine)
result = conn.execute(s1).fetchall()
"""
two_columns = """
s2 = select([user2]).compile(engine)
result = conn.execute(s2).fetchall()
"""
raw_many_columns = "res = conn.execute('SELECT * FROM user').fetchall()"
raw_two_columns = "res = conn.execute('SELECT * FROM user2').fetchall()"
timeit.Timer(two_columns, setup).timeit(number = 1)
timeit.Timer(raw_two_columns, setup).timeit(number = 1)
timeit.Timer(many_columns, setup).timeit(number = 1)
timeit.Timer(raw_many_columns, setup).timeit(number = 1)
>>> timeit.Timer(two_columns, setup).timeit(number = 1)
0.010751008987426758
>>> timeit.Timer(raw_two_columns, setup).timeit(number = 1)
0.0099620819091796875
>>> timeit.Timer(many_columns, setup).timeit(number = 1)
0.23563408851623535
>>> timeit.Timer(raw_many_columns, setup).timeit(number = 1)
0.21881699562072754
我没有发现这样的:
http://www.mysqlperformanceblog.com/2009/09/28/how-number-of-columns-affects-performance/
这是一种有趣的,虽然他用max
测试...
我真的爱SQLAlchemy的,所以我决定用自己的蟒蛇sqlite3的模块来比较它
import timeit
setup = """
import sqlite3
conn = sqlite3.connect(':memory:')
c = conn.cursor()
c.execute('CREATE TABLE user (%s)' %\
("".join(("c%i VARCHAR(1) DEFAULT '0' NOT NULL, d%i VARCHAR(2) DEFAULT '00' NOT NULL," % (index, index) for index in xrange(99))) +\
"c99 VARCHAR(1) DEFAULT '0' NOT NULL, d99 VARCHAR(2) DEFAULT '0' NOT NULL"))
c.execute("CREATE TABLE user2 (c0 VARCHAR(100) DEFAULT '%s' NOT NULL, d0 VARCHAR(200) DEFAULT '%s' NOT NULL)" % ('0'* 100, '0'*200))
conn.commit()
c.executemany('INSERT INTO user VALUES (%s)' % ('?,' * 199 + '?'), [('0',) * 200] * 4000)
c.executemany('INSERT INTO user2 VALUES (?,?)', [('0'*100, '0'*200)] * 4000)
conn.commit()
"""
many_columns = """
r = c.execute('SELECT * FROM user')
all = r.fetchall()
"""
two_columns = """
r2 = c.execute('SELECT * FROM user2')
all = r2.fetchall()
"""
timeit.Timer(many_columns, setup).timeit(number = 1)
timeit.Timer(two_columns, setup).timeit(number = 1)
>>> timeit.Timer(many_columns, setup).timeit(number = 1)
0.21009302139282227
>>> timeit.Timer(two_columns, setup).timeit(number = 1)
0.0083379745483398438
,并想出了相同的结果,所以我真的不认为这是一个数据库实现不是sqlalchemy
问题。
DEFAULT INSERT
import timeit
setup = """
from sqlalchemy import create_engine, MetaData, select, Table, Column
from sqlalchemy.dialects.sqlite import BOOLEAN, SMALLINT, VARCHAR
engine = create_engine('sqlite://', echo = False)
metadata = MetaData()
conn = engine.connect()
columns = []
for i in xrange(100):
columns.append(Column('c%d' % i, VARCHAR(1), nullable = False, server_default = '0'))
columns.append(Column('d%d' % i, VARCHAR(2), nullable = False, server_default = '00'))
user = Table('user', metadata, *columns)
user.create(engine)
user2 = Table('user2', metadata, Column('c0', VARCHAR(100), nullable = False, server_default = '0' * 100), \
Column('d0', VARCHAR(200), nullable = False, server_default = '0' * 200))
user2.create(engine)
"""
many_columns = """
conn.execute(user.insert(), [{}] * 4000)
"""
two_columns = """
conn.execute(user2.insert(), [{}] * 4000)
"""
>>> timeit.Timer(two_columns, setup).timeit(number = 1)
0.017949104309082031
>>> timeit.Timer(many_columns, setup).timeit(number = 1)
0.047809123992919922
测试与sqlite3的模块。
import timeit
setup = """
import sqlite3
conn = sqlite3.connect(':memory:')
c = conn.cursor()
c.execute('CREATE TABLE user (%s)' %\
("".join(("c%i VARCHAR(1) DEFAULT '0' NOT NULL, d%i VARCHAR(2) DEFAULT '00' NOT NULL," % (index, index) for index in xrange(99))) +\
"c99 VARCHAR(1) DEFAULT '0' NOT NULL, d99 VARCHAR(2) DEFAULT '0' NOT NULL"))
c.execute("CREATE TABLE user2 (c0 VARCHAR(100) DEFAULT '%s' NOT NULL, d0 VARCHAR(200) DEFAULT '%s' NOT NULL)" % ('0'* 100, '0'*200))
conn.commit()
"""
many_columns = """
c.executemany('INSERT INTO user VALUES (%s)' % ('?,' * 199 + '?'), [('0', '00') * 100] * 4000)
conn.commit()
"""
two_columns = """
c.executemany('INSERT INTO user2 VALUES (?,?)', [('0'*100, '0'*200)] * 4000)
conn.commit()
"""
timeit.Timer(many_columns, setup).timeit(number = 1)
timeit.Timer(two_columns, setup).timeit(number = 1)
>>> timeit.Timer(many_columns, setup).timeit(number = 1)
0.14044189453125
>>> timeit.Timer(two_columns, setup).timeit(number = 1)
0.014360189437866211
>>>
非常感谢您对您的出色评论。我其实问了一个相关的问题[插入速度的int和二进制](http://stackoverflow.com/questions/11388729/the-insert-speed-of-int-and-binary)。我用相同的表使用sql测试了插入速度,发现对于包含更多列的表,插入速度非常快。这不是很奇怪吗? @ samy.vilar – heller 2012-07-09 09:04:42
@heller很有趣,我使用默认/空白插入以及填充进行了一个快速简单的测试,尽管使用VARCHAR只是为了确保没有有趣的conv检查哪里发生了什么,并且具有two_columns的那个更快,所以我不确定MySQL会怎么样,文章说'InooDB慢很多' – 2012-07-09 09:23:17
你可能会发现,如果你在每一行内访问每一列像“[col for row in]”,SQLAlchemy的例子将会有更多的开销。这就是每列数据处理器的用武之地 - 尽管对于SQLite,我们只使用日期类型。另外,很高兴你在这里使用核心!使用ORM,如果加载完整对象,开销将更加显着。 – zzzeek 2012-07-09 13:19:16