blob: 3ded775ccf2ed5f185c65ffc6dfb0da8ba0e856f [file] [log] [blame]
# Script to attempt to clean up our owncloud database (b/32) after The Postgres
# Fuckup (b/30).
#
# Think of it as a one-shot fsck, documented in the form of the code that q3k@
# used to recover from this kerfuffle.
#
# SECURITY: It's full of manual SQL query crafting without parametrization.
# Don't attempt to use it for anything else other than this one-shot usecase.
#
# You will need to tunnel to the postgreses running on Boston:
# $ ssh \
# -L15432:127.0.0.1:5432 \
# -L15433:127.0.0.1:5433 \
# hackerspace.pl
from datetime import datetime
import os
import psycopg2
incident_start = 1611529200 # when pg12 started to run
incident_end = 1611788400 # when we rolled back to pg9
OWNCLOUD_PASSWORD = os.environ.get("OWNCLOUD_PASSWORD").strip()
if not OWNCLOUD_PASSWORD:
# Get it from boston, /var/www/owncloud/config/config.php.
raise Exception("OWNCLOUD_PASSWORD must be set to owncloud postgres password")
conn9 = psycopg2.connect(host="localhost", port=15432, user="owncloud", password=OWNCLOUD_PASSWORD, dbname="owncloud")
conn12 = psycopg2.connect(host="localhost", port=15433, user="owncloud", password=OWNCLOUD_PASSWORD, dbname="owncloud")
def idset(conn, table, keyname="id"):
"""Return a set of IDs from a given table, one per row."""
cur = conn.cursor()
cur.execute(f"SELECT {keyname} FROM oc_{table}")
res = cur.fetchall()
cur.close()
return set([r[0] for r in res])
def valset(conn, table, keys):
"""Return a set of concatenated values for the given keys in a table, one per row."""
keynames = ", ".join(keys)
cur = conn.cursor()
cur.execute(f"SELECT {keynames} FROM oc_{table}")
res = cur.fetchall()
cur.close()
res = [';;;'.join([str(elem) for elem in r]) for r in res]
return set(res)
# Check accounts difference.
#
# RESULT: Thankfully, no accounts have been accidentally roled back.
accounts12 = idset(conn12, "accounts", keyname="uid")
accounts9 = idset(conn9, "accounts", keyname="uid")
print("Accounts missing in 9:", accounts12 - accounts9)
assert (accounts12 - accounts9) == set()
def account_by_uid(conn, uid):
"""Return SSO UID for a given Owncloud UID."""
cur = conn.cursor()
cur.execute(f"SELECT ldap_dn FROM oc_ldap_user_mapping WHERE owncloud_name = '{uid}'")
dn, = cur.fetchone()
cur.close()
part = dn.split(',')[0]
assert part.startswith('uid=')
return part[4:]
def storage_owner_by_id(conn, id_):
"""Return SSO UID for a given storage numerical ID."""
cur = conn.cursor()
cur.execute(f"SELECT id FROM oc_storages WHERE numeric_id = '{id_}'")
oid, = cur.fetchone()
cur.close()
if oid == 'object::store:amazon::nextcloud':
return "S3"
assert oid.startswith('object::user:')
userid = oid[13:]
assert len(userid) > 0
if userid == "gallery":
return "GALLERY"
return account_by_uid(conn, userid)
# Check shares table. This table contains the intent of sharing some file with someone else.
#
# RESULT: we only have things that have been removed after rollback to PG9,
# nothing was created in PG12 and lost.
shareids12 = idset(conn12, "share")
shareids9 = idset(conn9, "share")
print("Shares missing in 9:", len(shareids12 - shareids9))
cur12 = conn12.cursor()
for id_ in list(shareids12-shareids9):
cur12.execute(f"SELECT uid_owner, file_target, stime, share_with FROM oc_share WHERE id = {id_}")
uid_owner, file_target, stime, share_with = cur12.fetchone()
account = account_by_uid(conn12, uid_owner)
stime_human = datetime.utcfromtimestamp(stime).strftime('%Y-%m-%d %H:%M:%S')
print(f"Missing share {id_} {file_target} owned by {account}..")
if stime < incident_start or stime > incident_end:
print(f" Skipping, created at {stime_human}")
continue
raise Exception("Unhandled.")
cur12.close()
# Check mounts table. This contains root file storages for each user, but also
# incoming shares 'mounted' into a user's account.
# From what I cen tell, storage_id/root_id are the source path that's being
# mounted (root_id being the fileid inside an oc_filecache, and storage_id
# being the storage in which that file is kept), while user_id/mount_point are
# the mount destination (ie. path into which this is mounted for a user's
# view).
#
# RESULT: we only have share-mounts missing for a handful of users. We choose
# to ignore it, as we assume next time these users log in they will get the
# mounts again.
# TODO(q3k): verify this
mounts12 = valset(conn12, "mounts", ["storage_id", "root_id", "user_id", "mount_point"])
mounts9 = valset(conn9, "mounts", ["storage_id", "root_id", "user_id", "mount_point"])
print("Mounts missing in 9:", len(mounts12 - mounts9))
# Mounts that appearify normally whenever you log into owncloud, as they are the result of shares':
mount_names_ok = set(["2020-03-26_covid_templar", "camera", "Public Shaming", "przylbice.md", "Test.txt", "covid"])
# Mounts that used to be from a share that existed, but has been since deleted in PG9.
mount_names_ok |= set(["Covid-instrukcje", "Chaos_modele_covid", "Covid_proces_presspack"])
mounts_sorted = []
for m in list(mounts12 - mounts9):
storage_id, root_id, user_id, mount_point = m.split(';;;')
mounts_sorted.append((storage_id, root_id, user_id, mount_point))
mounts_sorted = sorted(mounts_sorted, key=lambda el: el[2])
for storage_id, root_id, user_id, mount_point in mounts_sorted:
assert mount_point.startswith("/" + user_id + "/")
mount_point = mount_point[len(user_id)+1:]
account = account_by_uid(conn12, user_id)
print(f"Missing mount {mount_point}, storage ID {storage_id}, owned by {account}..")
storage_owner = storage_owner_by_id(conn12, storage_id)
print(f" Storage owner: {storage_owner}")
parts = mount_point.split('/')
if len(parts) == 4 and parts[0] == '' and parts[1] == 'files' and parts[2] in mount_names_ok and parts[3] == '':
print(" Skipping, known okay")
continue
raise Exception("Unhandled")