Revision: 68647
Updated Code
at February 4, 2015 02:18 by brownrl
Updated Code
BEGIN TRANSACTION; ---- -- Drop table for cities ---- DROP TABLE "cities"; ---- -- Table structure for cities ---- CREATE TABLE 'cities' ('id' INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 'name' TEXT); ---- -- Data dump for cities, a total of 3 rows ---- INSERT INTO "cities" ("id","name") VALUES ('1','Paris'); INSERT INTO "cities" ("id","name") VALUES ('2','London'); INSERT INTO "cities" ("id","name") VALUES ('3','New York'); ---- -- Drop table for tags ---- DROP TABLE "tags"; ---- -- Table structure for tags ---- CREATE TABLE 'tags' ('id' INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 'tag' TEXT); ---- -- Data dump for tags, a total of 3 rows ---- INSERT INTO "tags" ("id","tag") VALUES ('1','Europe'); INSERT INTO "tags" ("id","tag") VALUES ('2','North America'); INSERT INTO "tags" ("id","tag") VALUES ('3','River'); ---- -- Drop table for cities_tags ---- DROP TABLE "cities_tags"; ---- -- Table structure for cities_tags ---- CREATE TABLE 'cities_tags' ('id' INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 'city_id' INTEGER NOT NULL, 'tag_id' INTEGER NOT NULL); ---- -- Data dump for cities_tags, a total of 5 rows ---- INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('1','1','1'); INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('2','1','3'); INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('3','2','1'); INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('4','2','3'); INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('5','3','2'); INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('6','3','3'); COMMIT; ---- -- Now the fun, when we are looking at city 1 ( Paris ) want to find the -- more similar city to Paris by looking at the tags. -- we use a jacqaard index to compare the sets and ratios -- city_id=1 is Paris and should be change to variable... -- city_id!=1 is also making sure that we don't include paris in the results... ---- select c.name, case when not_in.cnt is null then intersection.cnt * 1.0 /(select count(tag_id) from cities_tags where city_id=1) else intersection.cnt/(not_in.cnt+(select count(tag_id) from cities_tags where city_id=1)) end as jaccard_index from cities c inner join ( select city_id, count(*) as cnt from cities_tags where tag_id in (select tag_id from cities_tags where city_id=1) and city_id!=1 group by city_id ) as intersection on c.id=intersection.city_id left join ( select city_id, count(tag_id) as cnt from cities_tags where city_id!=1 and not tag_id in (select tag_id from cities_tags where city_id=1) group by city_id ) as not_in on c.id=not_in.city_id order by jaccard_index desc
Revision: 68646
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at February 4, 2015 02:11 by brownrl
Initial Code
select c.name, case when not_in.cnt is null then intersection.cnt * 1.0 /(select count(tag_id) from cities_tags where city_id=1) else intersection.cnt/(not_in.cnt+(select count(tag_id) from cities_tags where city_id=1)) end as jaccard_index from cities c inner join ( select city_id, count(*) as cnt from cities_tags where tag_id in (select tag_id from cities_tags where city_id=1) and city_id!=1 group by city_id ) as intersection on c.id=intersection.city_id left join ( select city_id, count(tag_id) as cnt from cities_tags where city_id!=1 and not tag_id in (select tag_id from cities_tags where city_id=1) group by city_id ) as not_in on c.id=not_in.city_id order by jaccard_index desc
Initial URL
http://www.itsgotto.be/cv
Initial Description
This piece of sql is used to find the similarity of one item to other items based on a 3 table tag system, cities, cities_tags, tags. It works in sQlite and MySQL. Use this code to get the idea of how you would make a "item" has "related items" based on "tags" solution.
Initial Title
Jacaard Index, Similarity based on Tag System
Initial Tags
sql
Initial Language
SQL