/ Published in: SQL
This piece of sql is used to find the similarity of one item to other items based on a 3 table tag system, cities, cities_tags, tags. It works in sQlite and MySQL. Use this code to get the idea of how you would make a "item" has "related items" based on "tags" solution.
Expand |
Embed | Plain Text
Copy this code and paste it in your HTML
BEGIN TRANSACTION; ---- -- Drop table for cities ---- DROP TABLE "cities"; ---- -- Table structure for cities ---- CREATE TABLE 'cities' ('id' INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 'name' TEXT); ---- -- Data dump for cities, a total of 3 rows ---- INSERT INTO "cities" ("id","name") VALUES ('1','Paris'); INSERT INTO "cities" ("id","name") VALUES ('2','London'); INSERT INTO "cities" ("id","name") VALUES ('3','New York'); ---- -- Drop table for tags ---- DROP TABLE "tags"; ---- -- Table structure for tags ---- CREATE TABLE 'tags' ('id' INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 'tag' TEXT); ---- -- Data dump for tags, a total of 3 rows ---- INSERT INTO "tags" ("id","tag") VALUES ('1','Europe'); INSERT INTO "tags" ("id","tag") VALUES ('2','North America'); INSERT INTO "tags" ("id","tag") VALUES ('3','River'); ---- -- Drop table for cities_tags ---- DROP TABLE "cities_tags"; ---- -- Table structure for cities_tags ---- CREATE TABLE 'cities_tags' ('id' INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 'city_id' INTEGER NOT NULL, 'tag_id' INTEGER NOT NULL); ---- -- Data dump for cities_tags, a total of 5 rows ---- INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('1','1','1'); INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('2','1','3'); INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('3','2','1'); INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('4','2','3'); INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('5','3','2'); INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('6','3','3'); COMMIT; ---- -- Now the fun, when we are looking at city 1 ( Paris ) want to find the -- more similar city to Paris by looking at the tags. -- we use a jacqaard index to compare the sets and ratios -- city_id=1 is Paris and should be change to variable... -- city_id!=1 is also making sure that we don't include paris in the results... ---- SELECT c.name, CASE WHEN not_in.cnt IS NULL THEN INTERSECTION.cnt * 1.0 /(SELECT COUNT(tag_id) FROM cities_tags WHERE city_id=1) ELSE INTERSECTION.cnt/(not_in.cnt+(SELECT COUNT(tag_id) FROM cities_tags WHERE city_id=1)) END AS jaccard_index FROM cities c INNER JOIN ( SELECT city_id, COUNT(*) AS cnt FROM cities_tags WHERE tag_id IN (SELECT tag_id FROM cities_tags WHERE city_id=1) AND city_id!=1 GROUP BY city_id ) AS INTERSECTION ON c.id=INTERSECTION.city_id LEFT JOIN ( SELECT city_id, COUNT(tag_id) AS cnt FROM cities_tags WHERE city_id!=1 AND NOT tag_id IN (SELECT tag_id FROM cities_tags WHERE city_id=1) GROUP BY city_id ) AS not_in ON c.id=not_in.city_id ORDER BY jaccard_index DESC
URL: http://www.itsgotto.be/cv