import sqlite3
conn = sqlite3.connect('dlab')
conn.close()sql = "SELECT * FROM `user`"
cursor = conn.execute(sql)
for row in cursor.fetchall():
print (row)
(1, 'Mars', 1.5) (2, 'Bruce', 1.3) (3, 'Coffie', 0.8) (4, 'Jun', 0.5) (5, 'Byron', 0.5) (6, 'Bruce', 0.3)
sql = "SELECT `sn`,`name` FROM `user` WHERE `name` LIKE '%B%'"
cursor = conn.execute(sql)
for row in cursor.fetchall():
sn,name = row[0],row[1]
# sn,name = row # 因為row也只有兩格,剛好對應到sn,name,所以可以偷懶寫成這樣
print (sn,name)
2 Bruce 5 Byron 6 Bruce
sql = "SELECT `sn`,`name` FROM `user` WHERE `name` LIKE '%B%'"
cursor = conn.execute(sql)
print (list(cursor.fetchall()))
for row in cursor.fetchall():
print (row)
[(2, 'Bruce'), (5, 'Byron'), (6, 'Bruce')]
sql = "SELECT `sn`,`name` FROM `user` WHERE `name` LIKE '%B%'"
cursor = conn.execute(sql)
for row in cursor.fetchall():
print (row)
print (list(cursor.fetchall()))
(2, 'Bruce') (5, 'Byron') (6, 'Bruce') []
import sqlite3
conn = sqlite3.connect('dlab')
conn.row_factory = sqlite3.Row ###
sql = "SELECT `sn`,`name` FROM `user` WHERE `name` LIKE '%B%'"
cursor = conn.execute(sql)
for row in cursor.fetchall():
print (row['sn'],row['name'],type(row))
conn.close()
2 Bruce <class 'sqlite3.Row'> 5 Byron <class 'sqlite3.Row'> 6 Bruce <class 'sqlite3.Row'>
import sqlite3
conn = sqlite3.connect('dlab')
sql = "SELECT `sn`,`name` FROM `user` WHERE `name` LIKE '%B%'"
cursor = conn.execute(sql)
for row in cursor.fetchall():
print (row[0],row[1],type(row))
conn.close()
2 Bruce <class 'tuple'> 5 Byron <class 'tuple'> 6 Bruce <class 'tuple'>
.commit() !import sqlite3
conn = sqlite3.connect('dlab')
sql = "UPDATE `user` SET `year`=0.6 WHERE `sn`=2"
cursor = conn.execute(sql)
cursor = conn.commit()
sql = "SELECT * FROM `user`"
cursor = conn.execute(sql)
for row in cursor.fetchall():
print (row)
conn.close()
(1, 'Mars', 1.5) (2, 'Bruce', 0.6) (3, 'Coffie', 0.8) (4, 'Jun', 0.5) (5, 'Byron', 0.5) (6, 'Bruce', 0.3)
filein = open("movie_metadata.csv")
data = filein.read().split("\n")[1:] #第一行是欄標題,不需要
# 欄位名稱,如果希望用key,value的方式存取資料,下方串列可以拿來利用!
csv_key_index = ['color','director_name','num_critic_for_reviews','duration','director_facebook_likes','actor_3_facebook_likes','actor_2_name','actor_1_facebook_likes','gross','genres','actor_1_name','movie_title','num_voted_users','cast_total_facebook_likes','actor_3_name','facenumber_in_poster','plot_keywords','movie_imdb_link','num_user_for_reviews','language','country','content_rating','budget','title_year','actor_2_facebook_likes','imdb_score','aspect_ratio','movie_facebook_likes']
for line in data:
line = line.replace("\xa0","")
columns = line.split(",")
columns_list = []
count = 0
now_data = ""
# 檢查有沒有column 因為內容包含,被切斷,需要把他們合併起來
for column in columns:
if "\"" in column: # 有包含,的內容,會以"包起來,所以就是檢查是否有落單的"
count+= 1
if count!=0: # 如果有出現落單的",就一直把資料相加
now_data+= column
if count==0:
columns_list.append(column)
elif count ==2: # 如果發現已經有成對的",就算合併完成,可以丟到columns_list中
now_data = now_data.replace("\"", "")
columns_list.append(now_data)
now_data = ""
count = 0
print (columns_list)
break # 會跳出最近一層的迴圈(while,for),配合 print 可以幫忙debug
filein.close()
['Color', 'James Cameron', '723', '178', '0', '855', 'Joel David Moore', '1000', '760505847', 'Action|Adventure|Fantasy|Sci-Fi', 'CCH Pounder', 'Avatar', '886204', '4834', 'Wes Studi', '0', 'avatar|future|marine|native|paraplegic', 'http://www.imdb.com/title/tt0499549/?ref_=fn_tt_tt_1', '3054', 'English', 'USA', 'PG-13', '237000000', '2009', '936', '7.9', '1.78', '33000']
csv_key_index = ['color','director_name','num_critic_for_reviews','duration','director_facebook_likes','actor_3_facebook_likes','actor_2_name','actor_1_facebook_likes','gross','genres','actor_1_name','movie_title','num_voted_users','cast_total_facebook_likes','actor_3_name','facenumber_in_poster','plot_keywords','movie_imdb_link','num_user_for_reviews','language','country','content_rating','budget','title_year','actor_2_facebook_likes','imdb_score','aspect_ratio','movie_facebook_likes']
csv_dict = {}
index = 0
for value in csv_key_index:
print (index,value)
csv_dict[value]=index
index+=1
print (csv_dict)
0 color
1 director_name
2 num_critic_for_reviews
3 duration
4 director_facebook_likes
5 actor_3_facebook_likes
6 actor_2_name
7 actor_1_facebook_likes
8 gross
9 genres
10 actor_1_name
11 movie_title
12 num_voted_users
13 cast_total_facebook_likes
14 actor_3_name
15 facenumber_in_poster
16 plot_keywords
17 movie_imdb_link
18 num_user_for_reviews
19 language
20 country
21 content_rating
22 budget
23 title_year
24 actor_2_facebook_likes
25 imdb_score
26 aspect_ratio
27 movie_facebook_likes
{'duration': 3, 'num_user_for_reviews': 18, 'actor_2_facebook_likes': 24, 'budget': 22, 'actor_3_facebook_likes': 5, 'genres': 9, 'language': 19, 'movie_title': 11, 'gross': 8, 'title_year': 23, 'num_critic_for_reviews': 2, 'director_name': 1, 'director_facebook_likes': 4, 'cast_total_facebook_likes': 13, 'actor_1_facebook_likes': 7, 'movie_facebook_likes': 27, 'content_rating': 21, 'num_voted_users': 12, 'aspect_ratio': 26, 'plot_keywords': 16, 'movie_imdb_link': 17, 'imdb_score': 25, 'country': 20, 'actor_3_name': 14, 'actor_2_name': 6, 'color': 0, 'actor_1_name': 10, 'facenumber_in_poster': 15}
csv_key_index = ['color','director_name','num_critic_for_reviews','duration','director_facebook_likes','actor_3_facebook_likes','actor_2_name','actor_1_facebook_likes','gross','genres','actor_1_name','movie_title','num_voted_users','cast_total_facebook_likes','actor_3_name','facenumber_in_poster','plot_keywords','movie_imdb_link','num_user_for_reviews','language','country','content_rating','budget','title_year','actor_2_facebook_likes','imdb_score','aspect_ratio','movie_facebook_likes']
csv_dict = {}
for index,value in enumerate(csv_key_index): ###
print (index,value)
csv_dict[value]=index
print (csv_dict)
0 color
1 director_name
2 num_critic_for_reviews
3 duration
4 director_facebook_likes
5 actor_3_facebook_likes
6 actor_2_name
7 actor_1_facebook_likes
8 gross
9 genres
10 actor_1_name
11 movie_title
12 num_voted_users
13 cast_total_facebook_likes
14 actor_3_name
15 facenumber_in_poster
16 plot_keywords
17 movie_imdb_link
18 num_user_for_reviews
19 language
20 country
21 content_rating
22 budget
23 title_year
24 actor_2_facebook_likes
25 imdb_score
26 aspect_ratio
27 movie_facebook_likes
{'duration': 3, 'num_user_for_reviews': 18, 'actor_2_facebook_likes': 24, 'budget': 22, 'actor_3_facebook_likes': 5, 'genres': 9, 'language': 19, 'movie_title': 11, 'gross': 8, 'title_year': 23, 'num_critic_for_reviews': 2, 'director_name': 1, 'director_facebook_likes': 4, 'cast_total_facebook_likes': 13, 'actor_1_facebook_likes': 7, 'movie_facebook_likes': 27, 'content_rating': 21, 'num_voted_users': 12, 'aspect_ratio': 26, 'plot_keywords': 16, 'movie_imdb_link': 17, 'imdb_score': 25, 'country': 20, 'actor_3_name': 14, 'actor_2_name': 6, 'color': 0, 'actor_1_name': 10, 'facenumber_in_poster': 15}
import sqlite3
conn = sqlite3.connect('dlab_imdb')
def get_movie_sn(movie_title,title_year):
sql = 'SELECT `sn` FROM `movie` WHERE `movie_title` LIKE "{}" and title_year = {}'.format(movie_title,title_year)
cursor = conn.execute(sql)
result = cursor.fetchall()
if result:
movie_sn = result[0][0]
return movie_sn
else:
return 0
def get_actor_movie_sn(actor_name,movie_sn):
sql = 'SELECT `sn` FROM `actor_movie` WHERE `actor_name` LIKE "{}" and movie_sn = {}'.format(actor_name,movie_sn)
cursor = conn.execute(sql)
result = cursor.fetchall()
if result:
actor_movie_sn = result[0][0]
return actor_movie_sn
else:
return 0
def get_correct_column_list(columns):
columns_list = []
count = 0
now_data = ""
for column in columns:
if "\"" in column:
count+= 1
if count!=0:
now_data+= column
if count==0:
columns_list.append(column)
elif count ==2:
now_data = now_data.replace("\"", "")
columns_list.append(now_data)
now_data = ""
count = 0
return columns_list
csv_key_index = ['color','director_name','num_critic_for_reviews','duration','director_facebook_likes','actor_3_facebook_likes','actor_2_name','actor_1_facebook_likes','gross','genres','actor_1_name','movie_title','num_voted_users','cast_total_facebook_likes','actor_3_name','facenumber_in_poster','plot_keywords','movie_imdb_link','num_user_for_reviews','language','country','content_rating','budget','title_year','actor_2_facebook_likes','imdb_score','aspect_ratio','movie_facebook_likes']
csv_dict = {}
for index,value in enumerate(csv_key_index):
csv_dict[value]=index
filein = open("movie_metadata.csv")
data = filein.read().split("\n")[1:]
for line in data:
line = line.replace("\xa0","")
columns = line.split(",")
columns_list = get_correct_column_list(columns) ###
movie_title,title_year = columns_list[csv_dict['movie_title']].strip(),columns_list[csv_dict['title_year']]
if not title_year:
title_year = 0
movie_sn = get_movie_sn(movie_title,title_year)
if movie_sn==0:
sql = "INSERT INTO `movie`(`sn`,`movie_title`,`title_year`) VALUES (NULL,'{}',{});".format(movie_title,title_year)
cursor = conn.execute(sql)
cursor = conn.commit()
movie_sn = get_movie_sn(movie_title,title_year)
for i in range(1,4):
actor_key = "actor_{}_name".format(str(i))
actor_name = columns_list[csv_dict[actor_key]]
print (movie_title,actor_name)
actor_movie_sn = get_actor_movie_sn(actor_name,movie_sn)
if actor_movie_sn==0:
sql = "INSERT INTO `actor_movie`(`sn`,`actor_name`,`movie_sn`) VALUES (NULL,'{}',{});".format(actor_name,movie_sn)
cursor = conn.execute(sql)
cursor = conn.commit()
break
filein.close()
Avatar CCH Pounder Avatar Joel David Moore Avatar Wes Studi
print ("movie table============")
sql = "SELECT * FROM `movie`"
cursor = conn.execute(sql)
for row in cursor.fetchall():
print (row)
print ("\n")
print ("actor_movie table============")
sql = "SELECT * FROM `actor_movie`"
cursor = conn.execute(sql)
for row in cursor.fetchall():
print (row)
movie table============ (1, 'Avatar', 2009) actor table============ (1, 'CCH Pounder', 1) (2, 'Joel David Moore', 1) (3, 'Wes Studi', 1)