博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
如何用python在mysql上创建1亿条数据
阅读量:6431 次
发布时间:2019-06-23

本文共 3191 字,大约阅读时间需要 10 分钟。

  hot3.png

use example;create TABLE `user` (  `id` int(11) NOT NULL,  `last_name` varchar(45) DEFAULT NULL,  `first_name` varchar(45) DEFAULT NULL,  `sex` set('M','F')  DEFAULT NULL,  `age` tinyint(1) DEFAULT NULL,  `phone` varchar(11) DEFAULT NULL,  `address` varchar(45) DEFAULT NULL,  `password` varchar(45) DEFAULT NULL,  `create_time` datetime DEFAULT NULL,  PRIMARY KEY (`id`),  KEY `idx_last_first_name_age` (`last_name`,`first_name`,`age`) USING BTREE,  KEY `idx_phone` (`phone`) USING BTREE,  KEY `idx_create_time` (`create_time`) USING BTREE) ENGINE=InnoDB DEFAULT CHARSET=utf8;
#!/usr/bin/python# -*- coding: UTF-8 -*-import MySQLdbimport MySQLdb.cursorsimport randomimport stringimport time#批量插的次数loop_count = 1000000#每次批量查的数据量batch_size = 100success_count = 0fails_count = 0#数据库的连接conn = MySQLdb.connect(host="127.0.0.1", user="root", passwd="123456", db="example", port=3306, cursorclass = MySQLdb.cursors.SSCursor)chars = 'AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz'digits = '0123456789'def random_generate_string(length):    return string.join(random.sample(chars, length), '')def random_generate_number(length):    if length > len(digits):        digit_list = random.sample(digits, len(digits))        digit_list.append(random.choice(digits))        return string.join(digit_list, '')    return string.join(random.sample(digits, length), '')def random_generate_data(num):    c = [num]    phone_num_seed = 13100000000    def _random_generate_data():        c[0] += 1        return (            c[0],            "last_name_" + str(random.randrange(100000)),            "first_name_" + str(random.randrange(100000)),            random.choice('MF'),            random.randint(1, 120),            phone_num_seed + c[0],            random_generate_string(20),            random_generate_string(10),            time.strftime("%Y-%m-%d %H:%M:%S")        )    return _random_generate_datadef execute_many(insert_sql, batch_data):    global success_count, fails_count    cursor = conn.cursor()    try:        cursor.executemany(insert_sql, batch_data)    except Exception, e:        conn.rollback()        fails_count = fails_count + len(batch_data)        print e        raise    else:        conn.commit()        success_count = success_count + len(batch_data)        print str(success_count) + " commit"    finally:        cursor.close()try:    #user表列的数量    column_count = 9    #插入的SQL    insert_sql = "replace into user(id, last_name, first_name, sex, age, phone, address, password, create_time) values (" + ",".join([ "%s" for x in xrange(column_count)]) + ")"    batch_count = 0    begin_time = time.time()    for x in xrange(loop_count):        batch_count =  x * batch_size        gen_fun = random_generate_data(batch_count)        batch_data = [gen_fun() for x in xrange(batch_size)]        execute_many(insert_sql, batch_data)    end_time = time.time()    total_sec = end_time - begin_time    qps = success_count / total_sec    print "总共生成数据: " + str(success_count)    print "总共耗时(s): " + str(total_sec)    print "QPS: " + str(qps)except Exception, e:    print e    raiseelse:    passfinally:    pass
python generate_user_data.py

 

转载于:https://my.oschina.net/jywm/blog/1603986

你可能感兴趣的文章
Java缓冲流细节
查看>>
IOS中复制对象的用法及深拷贝和浅拷贝详解
查看>>
lfs
查看>>
Eclipse内存不够解决办法
查看>>
关于tbody js取法兼容。
查看>>
[CC]点云密度计算
查看>>
CATransition 动画处理视图切换
查看>>
[转载] 高等应用数学问题的matlab求解——第3章 微积分问题的计算机求解
查看>>
大整数比较大小
查看>>
C++ 指定路径文件夹存在与否查询及文件夹创建
查看>>
八大排序算法的Java实现
查看>>
IDEA+Maven+Tomcat构建项目流程
查看>>
java 线程机制
查看>>
数据是重要的战略资源,数据同样是产品非常重要的组成部分。淘宝对中国最大的贡献,不只是方便了老百姓购物,而是把中国消费者的消费习惯数据慢慢沉淀下来。...
查看>>
Leetcode Find Minimum in Rotated Sorted Array
查看>>
Python接口测试-使用requests模块发送post请求
查看>>
System.currentTimeMillis()计算方式与时间的单位转换
查看>>
Extra:Variable Types
查看>>
js传参时,没有参数传入,默认值的设置
查看>>
ASP.NET温故而知新学习系列之ASP.NET多线程编程—.NET下的多线程编程Thread中委托的使用(六)...
查看>>