Return to Snippet

Revision: 68647
at February 4, 2015 02:18 by brownrl


Updated Code
BEGIN TRANSACTION;

----
-- Drop table for cities
----
DROP TABLE "cities";

----
-- Table structure for cities
----
CREATE TABLE 'cities' ('id' INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 'name' TEXT);

----
-- Data dump for cities, a total of 3 rows
----
INSERT INTO "cities" ("id","name") VALUES ('1','Paris');
INSERT INTO "cities" ("id","name") VALUES ('2','London');
INSERT INTO "cities" ("id","name") VALUES ('3','New York');

----
-- Drop table for tags
----
DROP TABLE "tags";

----
-- Table structure for tags
----
CREATE TABLE 'tags' ('id' INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 'tag' TEXT);

----
-- Data dump for tags, a total of 3 rows
----
INSERT INTO "tags" ("id","tag") VALUES ('1','Europe');
INSERT INTO "tags" ("id","tag") VALUES ('2','North America');
INSERT INTO "tags" ("id","tag") VALUES ('3','River');

----
-- Drop table for cities_tags
----
DROP TABLE "cities_tags";

----
-- Table structure for cities_tags
----
CREATE TABLE 'cities_tags' ('id' INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 'city_id' INTEGER NOT NULL, 'tag_id' INTEGER NOT NULL);

----
-- Data dump for cities_tags, a total of 5 rows
----
INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('1','1','1');
INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('2','1','3');
INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('3','2','1');
INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('4','2','3');
INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('5','3','2');
INSERT INTO "cities_tags" ("id","city_id","tag_id") VALUES ('6','3','3');
COMMIT;


----
-- Now the fun, when we are looking at city 1 ( Paris ) want to find the
-- more similar city to Paris by looking at the tags.
-- we use a jacqaard index to compare the sets and ratios
-- city_id=1 is Paris and should be change to variable...
-- city_id!=1 is also making sure that we don't include paris in the results...
----



select c.name, 
  case 
    when not_in.cnt is null 
  then 
    intersection.cnt * 1.0 /(select count(tag_id) from cities_tags where city_id=1) 
  else 
    intersection.cnt/(not_in.cnt+(select count(tag_id) from cities_tags where city_id=1)) 
  end as jaccard_index
from cities c 
inner join 
  (
    select city_id, count(*) as cnt 
    from cities_tags 
    where tag_id in (select tag_id from cities_tags where city_id=1) 
    and city_id!=1
    group by city_id
  ) as intersection
on c.id=intersection.city_id
left join
  (
    select city_id, count(tag_id) as cnt
    from cities_tags
    where city_id!=1
    and not tag_id in (select tag_id from cities_tags where city_id=1)
    group by city_id
  ) as not_in
on c.id=not_in.city_id
order by jaccard_index desc

Revision: 68646
at February 4, 2015 02:11 by brownrl


Initial Code
select c.name, 
  case 
    when not_in.cnt is null 
  then 
    intersection.cnt * 1.0 /(select count(tag_id) from cities_tags where city_id=1) 
  else 
    intersection.cnt/(not_in.cnt+(select count(tag_id) from cities_tags where city_id=1)) 
  end as jaccard_index
from cities c 
inner join 
  (
    select city_id, count(*) as cnt 
    from cities_tags 
    where tag_id in (select tag_id from cities_tags where city_id=1) 
    and city_id!=1
    group by city_id
  ) as intersection
on c.id=intersection.city_id
left join
  (
    select city_id, count(tag_id) as cnt
    from cities_tags
    where city_id!=1
    and not tag_id in (select tag_id from cities_tags where city_id=1)
    group by city_id
  ) as not_in
on c.id=not_in.city_id
order by jaccard_index desc

Initial URL
http://www.itsgotto.be/cv

Initial Description
This piece of sql is used to find the similarity of one item to other items based on a 3 table tag system, cities, cities_tags, tags. It works in sQlite and MySQL. Use this code to get the idea of how you would make a "item" has "related items" based on "tags" solution.

Initial Title
Jacaard Index, Similarity based on Tag System

Initial Tags
sql

Initial Language
SQL