Python: SQlite3 - Utilisée une base de donnée en mémoire
Avec Python, il est possible d'utiliser une base de donnée SQlite montée en mémoire vive.
Très pratique dans le cas d'une utilisation occasionnelle et non persistente (analyse d'un fichier csv par exemple).
Exemple avec Python3
Pour l'exemple, j'utilise le fichier des codes postaux français.
#!/usr/bin/env python3
import csv
import sqlite3
import time
# Execution start time
start = time.time()
# Fields list
fieldsList = [
'countryCode', 'postalCode', 'placeName', 'adminName1', 'adminCode1', 'adminName2',
'adminCode2', 'adminName3', 'adminCode3', 'latitude', 'longitude', 'accuracy'
]
# Create database in memory
con = sqlite3.connect(':memory:')
cur = con.cursor()
# Create table in database
cur.execute('create table postalCodes \
(id integer primary key, {0} text, {1} text, {2} text, \
{3} text, {4} text, {5} text, {6} text, {7} text, \
{8} text, {9} real, {10} real, {11} integer)'.format(*fieldsList)
)
# Insert request
sqlInsert = 'insert into postalCodes ({0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}) \
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'.format(*fieldsList)
# Read file and insert values into table 'postalCodes
'
with open('FR.txt', 'r', newline='', encoding='utf-8') as f:
reader = csv.reader(f, delimiter='\t')
for row in reader:
cur.execute(sqlInsert, row)
# Commit
con.commit()
# Execution end time
end = time.time()
# Start of analysis
for row in cur.execute('select count(*) from postalCodes'):
totalLines = row[0]
print("{:n} lines inserted in {:f} sec.".format(totalLines, end - start), end='\n\n')
# Result
51141 lines inserted in 0.493353 sec.
print("All lines with the postal code: '44150'.")
for row in cur.execute('select * from postalCodes where postalCode = ?', ('44150', )):
print(row)
# Result
All lines with the postal code: '44150'.
(27254, 'FR', '44150', 'Saint-Herblon', 'Pays de la Loire', '52', 'Loire-Atlantique', '44', 'Arrondissement d’Ancenis', '444', 47.4079, -1.0974, 5)
(27255, 'FR', '44150', 'Anetz', 'Pays de la Loire', '52', 'Loire-Atlantique', '44', 'Arrondissement d’Ancenis', '444', 47.3806, -1.1058, 5)
(27256, 'FR', '44150', 'Saint-Géréon', 'Pays de la Loire', '52', 'Loire-Atlantique', '44', 'Arrondissement d’Ancenis', '444', 47.3677, -1.2026, 5)
(27257, 'FR', '44150', 'Ancenis', 'Pays de la Loire', '52', 'Loire-Atlantique', '44', 'Arrondissement d’Ancenis', '444', 47.3667, -1.1667, 5)
print()
print("Minimum and maximum postal code for the adminCode2: 'Côtes-d'Armor'.")
for row in cur.execute('select min(postalCode), max(postalCode) from postalCodes where adminName2 = ?', ('Côtes-d\'Armor', )):
print(row)
# Result
Minimum and maximum postal code for the adminCode2: 'Côtes-d'Armor'.
('22000', '22980')
print()
print("The number of placeName by adminName1 and adminName2.")
for row in cur.execute('select adminName1, adminName2, count(placeName) from postalCodes group by adminName1, adminName2'):
print(row)
# Result
The number of placeName by adminName1 and adminName2.
('', '', 1)
('Alsace-Champagne-Ardenne-Lorraine', 'Ardennes', 496)
('Alsace-Champagne-Ardenne-Lorraine', 'Aube', 497)
('Alsace-Champagne-Ardenne-Lorraine', 'Bas-Rhin', 802)
...
('Île-de-France', 'Yvelines', 601)
Super pratique, non !!!
Ajouter un commentaire