SQLite Forum

tree in Nested Set Model, how can I calculate sum of non-leaf node?
Login

tree in Nested Set Model, how can I calculate sum of non-leaf node?

(1) By anonymous on 2021-01-15 09:25:35 [link] [source]

I followed the instruction to convert an adjaceny list to Nested Set according to this tutorial.

As a result, I got such a Nested Set:

(https://i.stack.imgur.com/uUcBW.png)

My tree is to simulate a file system. folder column 0 means file and 1 means folder; only file node have size value.

How could I out put every folder's total size in a query? I want to do this to get every sum of the folder nodes, to show them.

for example:

ID    ParentID    Points
1     0           null
2     1           20
3     0           30
4     1           null
5     4           50
6     4           60

Output should be(If I query ID=0):    
ID    Points
1     130(20+110)    
3     30
    
Output should be(If I query ID=1):
ID    Points
2     20
4     110(50+60)

My purpose is to show the list in the listview, and the performance is important because the operation will be intensive. I don't care memory occupation and my limit of the database is about, let it be, less than 1 million nodes totally under the root node.

What I used to do is to serialize the whole tree, and get the size recursively, the performance is very high, less than 2 seconds(totally 150,000 nodes totally below the root tree), from a tree to view on Listview:
(https://i.stack.imgur.com/4v51x.png)

(I also post on [stackoverflow](https://stackoverflow.com/questions/65725514/tree-in-nested-set-model-how-can-i-calculate-sum-of-non-leaf-nodesqlite))

(2) By Domingo (mingodad) on 2021-01-15 10:31:40 in reply to 1 [source]

I'm just working with storing a file system information on a database to reorganize/cleanup it and did the following (just in case it can help others):

1 - Got a list of all files (in linux) with "ls -AlRGg --full-time > all-files.txt".

2- Then parsed and inserted then in a sqlite database with the schema shown at the end.

3- For quering purposes I also created this index after the database was populated:
====
CREATE INDEX "files_size_idx" ON "files"(size)
====

4 - Create some list of interesting file extensions using this query:
====
--create view if not exists folders_by_type_size_view as
select
	/*hp.id as fp7_id,*/ hp.name as fp7,
	/*gp.id as fp6_id,*/ gp.name as fp6,
	/*fp.id as fp5_id,*/ fp.name as fp5,
	/*ep.id as fp4_id,*/ ep.name as fp4,
	/*dp.id as fp3_id,*/ dp.name as fp3,
	/*cp.id as fp2_id,*/ cp.name as fp2,
	/*bp.id as fp1_id,*/ bp.name as fp1,
	a.* from (
select folder_id as parent_id, folder, sum(size) as total_size from files_list_view where name like '%.h' group by folder_id order by 3 desc limit 1000
) as a
left join folders as b on a.parent_id=b.id
left join folders_list_view as bp on b.parent_id=bp.id
	left join folders_list_view as cp on bp.parent_id=cp.id
		left join folders_list_view as dp on cp.parent_id=dp.id
			left join folders_list_view as ep on dp.parent_id=ep.id
				left join folders_list_view as fp on ep.parent_id=fp.id
					left join folders_list_view as gp on fp.parent_id=gp.id
						left join folders_list_view as hp on gp.parent_id=hp.id;
====

Here is the result of querying stats_view:
====
tbl|cnt|total_size
files|16433449|1931486899532
folders|2522473|10573008896
====

And here is the script I used to parse and populate the database (using https://github.com/mingodad/squilu):

====
// generated with: ls -AlRGg --full-time
auto ls_fname = "all-files.txt";
//if(!existsfile(ls_fname)) os.system("ls -AlRGg --full-time > " + ls_fname);
auto db = SQLite3(ls_fname + ".db");
db.exec_dml([==[
create table if not exists names(
	id integer primary key not null,
	name varchar not null unique
);
delete from names;

create table if not exists folders(
	id integer primary key not null,
	parent_id integer check(id != parent_id) references folders(id),
	name_id integer not null references names(id),
	size integer not null,
	cdate datetime not null,
	unique(parent_id, name_id)
);
delete from folders;

create table if not exists files(
	id integer primary key not null,
	folder_id integer not null references folders(id),
	name_id integer not null references names(id),
	size integer not null,
	cdate datetime not null,
	hash varchar,
	unique(folder_id, name_id)
);
delete from files;

CREATE VIEW if not exists folders_list_view AS
SELECT
	a.id,
	b.name,
	a.parent_id, --c.id,
	a.name_id --b.id
FROM folders AS a
LEFT JOIN names AS b ON a.name_id = b.id;

CREATE VIEW if not exists files_list_view AS
SELECT
	a.id,
	d.name as folder,
	b.name,
	a.size,
	a.cdate,
	a.folder_id, --c.id,
	a.name_id --b.id,
FROM files AS a
LEFT JOIN names AS b ON a.name_id = b.id
LEFT JOIN folders AS c ON a.folder_id = c.id
LEFT JOIN names AS d ON c.name_id = d.id;

create view if not exists stats_view as
select 'folders' as tbl, count(*) cnt, sum(size) total_size from folders
UNION
select 'files' as tbl, count(*) cnt, sum(size) total_size from files;

]==]);

auto stmt_name_insert = db.prepare("insert into names(name) values(?);");
auto stmt_folder_insert = db.prepare("insert into folders(parent_id, name_id, size, cdate) values(?, (select id from names where name=?), ?, ?);");
auto stmt_file_insert = db.prepare("insert into files(folder_id, name_id, size, cdate) values(?, (select id from names where name=?), ?, ?);");

auto stmt_folder_id = db.prepare("select id from folders where parent_id=? and name_id=(select id from names where name=?);");

const root_folder_name = ".";

auto fd = file(ls_fname, "r");
auto line, folder_id, rc, name, fcdate, fsize, record, file_count = 0, folder_count = 0, folder_total_size = 0, file_total_size = 0, count = 0;

db.exec_dml("begin;");

//insert the root folder
stmt_name_insert.bind_exec(root_folder_name);
print(stmt_folder_insert.bind_exec(NULL, root_folder_name, 0, "0000-00-00"));
folder_id = 1;

while((line = fd.read_line()))
{
	//if(++count > 10) break;
	if(line.len() == 0) continue;
	auto line_type = line[0];
	switch(line_type)
	{
		case 'd': //folder
		case '-': //file
			//record = line.split(' ');
			//
			//*** The last separator should be only one space, because a filename can start with spaces
			record = line.match("(%S+)%s+(%S+)%s+(%S+)%s+(%S+)%s+(%S+)%s+(%S+) (.+)");
			//
			//print(record.len(), line);
			//print(record.len(), record[6]);
			name = record[6];
			fsize =  record[2].tointeger();
			fcdate = format("%s %s", record[3], record[4].slice(0, 8));
			stmt_name_insert.bind_exec(name);
			if(line_type == '-') {
				++file_count;
				file_total_size += fsize;
				rc = stmt_file_insert.bind_exec(folder_id, name, fsize, fcdate);
			} else {
				++folder_count;
				folder_total_size += fsize;
				rc = stmt_folder_insert.bind_exec(folder_id, name, fsize, fcdate);
			}
			if(rc != db.SQLITE_DONE) print("++FAILED", rc, folder_id, name, fsize, fcdate, line);
			break;
		case '.': //folder header
			//print(0, line);
			record = line.split('/');
			auto last_idx = record.len() -1;
			foreach(idx, name in record)
			{
				if(idx > 0)  {
					//file/folder names can end with ':', we are only cutting in the last name
					if(idx == last_idx && name[name.len()-1] == ':') name = name.slice(0, -1);
					//stmt_name_insert.bind_exec(name);
					//stmt_folder_insert.bind_exec(folder_id, name);
					folder_id = stmt_folder_id.bind_exec_get_one(folder_id, name);
					if(type(folder_id) != "integer") print("**FAILED", folder_id, name, line);
					//folder_id = db.changes() ? db.last_row_id() : stmt_folder_id.bind_exec_get_one(folder_id, name);
				}
				else folder_id = 1;
			}
			break;
		case 'l': //symbolic link
			break;
	}
}
db.exec_dml("commit;");
print(folder_count, folder_total_size, file_count, file_total_size);
fd.close();
db.close();

====

Cheers !