Integrate with Google Cloud API in Python

Integrate with Google Cloud API in Python

google-cloud, Python idiomatic clients for Google Cloud Platform services. There is an older Python library also officially supported by Google, google-api-python-client, which is in maintenance mode.

ref:
https://github.com/googleapis/google-cloud-python
https://github.com/googleapis/google-api-python-client

Installation

$ pip install google-cloud

# you could also only install specific components
$ pip install google-cloud-storage

ref:
https://pypi.org/search/?q=google+cloud

Google Cloud Storage

It is worth noting that, initializing storage.Client() is a blocking call.

ref:
https://googleapis.github.io/google-cloud-python/latest/storage/buckets.html
https://cloud.google.com/storage/docs/reference/libraries

Upload From String

from google.cloud.storage.bucket import Bucket
from google.cloud.storage.blob import Blob

def upload_from_string(bucket_id, content, filename, content_type):
    client = storage.Client()
    bucket = Bucket(client, bucket_id)
    blob = Blob(filename, bucket)
    blob.upload_from_string(content, content_type)

Upload From An URL

from google.cloud import storage
import requests

def upload_from_url(bucket_id, filename, url):
    client = storage.Client()
    session = requests.Session()
    with session.get(url, stream=True) as response:
        bucket = client.get_bucket(bucket_id)
        blob = bucket.blob(filename)
        blob.upload_from_file(response.raw, content_type=response.headers.get('Content-Type'))

Update A File's Metadata

from google.cloud import storage

def update_metadata(bucket, filepath, new_metadata):
    bucket = task.storage_client.get_bucket(bucket)
    blob = bucket.get_blob(filepath)
    blob.metadata = {**blob.metadata, **new_metadata} if blob.metadata else new_metadata
    blob.patch()

new_metadata = {
    'Link': '<https://api.example.com/users/57c16f5bb811055b66d8ef46>; rel="user"',
}

ref:
https://github.com/GoogleCloudPlatform/google-cloud-python/issues/1185

Copy A File

from google.cloud import storage

def copy_file(source_bucket, source_name, destination_bucket, destination_name):
    storage_client = storage.Client()
    source_bucket = storage_client.get_bucket(source_bucket)
    source_file = source_bucket.blob(source_name)
    destination_bucket = storage_client.get_bucket(destination_bucket)
    destination_file = source_bucket.copy_blob(source_file, destination_bucket, destination_name)
    return destination_file

file_ext_mapping = {
    'image/jpeg': 'jpg',
    'video/mp4': 'mp4',
}
file_ext = file_ext_mapping[original_message.media.mimetype]
source_name = f'messages/{original_message.id}.{file_ext}'
destination_name = f'messages/{new_message.id}.{file_ext}'

copy_file(
    source_bucket='asia.uploads.example.com',
    source_name=source_name,
    destination_bucket='asia.uploads.example.com',
    destination_name=destination_name,
)

ref:
https://cloud.google.com/storage/docs/json_api/v1/objects/copy
https://cloud.google.com/storage/docs/renaming-copying-moving-objects#storage-copy-object-python

Copy A Folder With Batch Operations

from google.cloud import storage

def copy_files(source_bucket_name, source_name_prefix, destination_bucket_name, fix_destination_name_func=None):
    storage_client = storage.Client()
    source_bucket = storage_client.get_bucket(source_bucket_name)
    destination_bucket = storage_client.get_bucket(destination_bucket_name)
    blobs = source_bucket.list_blobs(prefix=source_name_prefix)

    # YOU CANNOT DO THIS
    # blobs is a HTTP iterator
    # blobs.num_results always return 0
    # if not blobs.num_results:
    #     raise ValueError(f'No objects matched: gs://{source_bucket.name}/{source_name_prefix}')

    with storage_client.batch():
        for source_blob in blobs:
            destination_name = fix_destination_name_func(source_blob.name) if callable(fix_destination_name_func) else source_blob.name
            source_bucket.copy_blob(source_blob, destination_bucket, destination_name)
    return True

source_bucket_name = 'asia.uploads.example.com'
destination_bucket_name = 'asia.contents.example.com'
source_name_prefix = 'media/123'

copy_files(
    source_bucket_name=source_bucket_name,
    destination_bucket_name=destination_bucket_name,
    source_name_prefix=source_name_prefix,
    fix_destination_name_func=lambda source_name: source_name.replace(source_name_prefix, 'forum-posts'),
)

equals to

$ gsutil cp -r "gs://asia.uploads.example.com/media/123/*" "gs://asia.contents.example.com/"

ref:
https://cloud.google.com/storage/docs/listing-objects

batch() does not guarantee the order of executions, so do not mix different types of calls in the same batch. For instance, the batch should not be a mixture of "copy a.txt" then delete a.txt.

ref:
https://googlecloudplatform.github.io/google-cloud-python/latest/storage/batch.html

Upload A File Directly To A Bucket

We first need to generate a signed upload URL, and then we can upload the file to the URL.

import base64
import datetime
import time

from oauth2client.client import GoogleCredentials
import yarl

credentials = GoogleCredentials.get_application_default()

def signurl(method, url, content_type=None, expires_at=None, md5sum=None, meta=None):
    method, is_resumable = method.upper(), False
    if method in ['RESUMABLE']:
        method, is_resumable = 'POST', True
    path = yarl.URL(url).path

    def signature():
        def _signature_parts():
            def _meta():
                for key, value in (meta or {}).items():
                    yield 'x-goog-meta-{key}:{value}'.format(key=key, value=value)
                if is_resumable:
                    yield 'x-goog-resumable:start'

            yield method
            yield md5sum or ''
            # we need to use curl -H 'content-type:' to upload if we sign an empty content-type
            yield content_type or 'application/octet-stream'
            yield str(int(time.mktime(expires_at.timetuple()))) if expires_at else ''
            yield from sorted(_meta())
            yield path

        _, signature = credentials.sign_blob('n'.join(_signature_parts()))
        return base64.b64encode(signature).decode('utf-8')

    def params():
        yield 'GoogleAccessId', credentials.service_account_email
        if expires_at:
            yield 'Expires', int(time.mktime(expires_at.timetuple()))
        yield 'Signature', signature()

    return str(yarl.URL(url).with_query(**dict(params())))

signurl(
    method='RESUMABLE',
    url='https://storage.googleapis.com/asia.uploads.example.com/media/your-filename.ext'
    expires_at=datetime.datetime.utcnow() + datetime.timedelta(hours=24),
)
$ curl -v -X 'POST' 
-H 'content-type: application/octet-stream' 
-H 'x-goog-resumable:start' 
-d '' 'THE_SIGNED_UPLOAD_URL'

$ curl -v -X PUT 
--upload-file whatever.mp4 
THE_URL_FROM_LOCATION_HEADER_OF_THE_ABOVE_RESPONSE

ref:
https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable
https://cloud.google.com/storage/docs/uploading-objects
https://cloud.google.com/storage/docs/json_api/v1/how-tos/upload
https://cloud.google.com/storage/docs/json_api/v1/how-tos/resumable-upload
https://cloud.google.com/storage/docs/xml-api/resumable-upload

Enable CORS For A Google Cloud Storage Bucket

$ gsutil cors get gs://your_bucket_name

$ cat cors.json
[
  {
    "origin": ["*"],
    "responseHeader": ["Content-Type", "x-goog-resumable:start"],
    "method": ["GET", "PUT", ""]
  }
]
$ gsutil cors set cors.json gs://your_bucket_name

ref:
https://cloud.google.com/storage/docs/gsutil/commands/cors
https://medium.com/imersotechblog/upload-files-to-google-cloud-storage-gcs-from-the-browser-159810bb11e3
http://andrewvos.com/uploading-files-directly-to-google-cloud-storage-from-client-side-javascript

MongoDB operations: Replica Set

MongoDB operations: Replica Set

A replica set is a group of servers (mongod actually) that maintain the same data set, with one primary which takes client requests, and multiple secondaries that keep copies of the primary's data. If the primary crashes, secondaries can elect a new primary from amongst themselves.

Replication from primary to secondaries is asynchronous.

ref:
https://docs.mongodb.com/v3.6/replication/
https://www.safaribooksonline.com/library/view/mongodb-the-definitive/9781491954454/ch08.html
https://www.percona.com/blog/2018/10/10/mongodb-replica-set-scenarios-and-internals/

Concepts

  • Primary: A node that accepts writes and is the leader for voting. There can be only one primary.
  • Secondary: A node that replicates from the primary or another secondary and can be used for reads. There can be a max of 127.
  • Arbiter: The node does not hold data and only participates in the voting. Also, it cannot be elected as the primary.
    • In the event your node count is an even number, add one of these to break the tie. Never add one where it would make the count even.
  • Priority 0 node: The node cannot be selected as the primary. You might want to lower priority of some slow nodes.
    • Priority allows you to prefer specific nodes are primary.
  • Vote 0 node: The node does not participate in the voting.
    • In some cases, having more than eight nodes means additional nodes must not vote.
  • Hidden node: The hidden node must be a priority 0 node and is invisible to the driver which unable to take queries from clients.
  • Delayed node: The delayed node must be a hidden node, and its data lag behind the primary for some time.
  • Tags: Grants special ability to make queries directly to specific nodes. Useful for BI, geo-locality, and other advanced functions.

ref:
https://docs.mongodb.com/manual/core/replica-set-elections/
https://docs.mongodb.com/manual/core/replica-set-priority-0-member/
https://docs.mongodb.com/manual/core/replica-set-hidden-member/
https://docs.mongodb.com/manual/core/replica-set-delayed-member/

Common Architectures

ref:
https://docs.mongodb.com/v3.6/core/replica-set-architectures/
https://www.percona.com/blog/2018/03/22/the-anatomy-of-a-mongodb-replica-set/

Three-Node Replica Set: Primary, Secondary, Secondary

ref:
https://docs.mongodb.com/v3.6/tutorial/deploy-replica-set/
https://docs.mongodb.com/v3.6/tutorial/expand-replica-set/

If you are running MongoDB cluster on Kubernetes, PLEASE USE THE FULL DNS NAME (FQDN). DO NOT use something like pod-name.service-name.

$ mongo mongodb-rs0-0.mongodb-rs0.default.svc.cluster.local
> rs.initiate({
   _id : "rs0",
   members: [
      {_id: 0, host: "mongodb-rs0-0.mongodb-rs0.default.svc.cluster.local:27017"},
      {_id: 1, host: "mongodb-rs0-1.mongodb-rs0.default.svc.cluster.local:27017"},
      {_id: 2, host: "mongodb-rs0-2.mongodb-rs0.default.svc.cluster.local:27017"}
   ]
})
{
    "ok" : 1,
    "operationTime" : Timestamp(1531223087, 1),
    "$clusterTime" : {
        "clusterTime" : Timestamp(1531223087, 1),
        "signature" : {
            "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
            "keyId" : NumberLong(0)
        }
    }
}
rs0:PRIMARY> db.isMaster()

ref:
https://docs.mongodb.com/v3.6/reference/method/rs.initiate/

$ mongo mongodb-rs0-2.mongodb-rs0.default.svc.cluster.local
rs0:SECONDARY> rs.slaveOk()
rs0:SECONDARY> show dbs
rs0:SECONDARY> rs.conf()
{
    "_id" : "rs0",
    "version" : 1,
    "protocolVersion" : NumberLong(1),
    "members" : [
        {
            "_id" : 0,
            "host" : "mongodb-rs0-0.mongodb-rs0.default.svc.cluster.local:27017",
            "arbiterOnly" : false,
            "buildIndexes" : true,
            "hidden" : false,
            "priority" : 1,
            "tags" : {

            },
            "slaveDelay" : NumberLong(0),
            "votes" : 1
        },
        {
            "_id" : 1,
            "host" : "mongodb-rs0-1.mongodb-rs0.default.svc.cluster.local:27017",
            "arbiterOnly" : false,
            "buildIndexes" : true,
            "hidden" : false,
            "priority" : 1,
            "tags" : {

            },
            "slaveDelay" : NumberLong(0),
            "votes" : 1
        },
        {
            "_id" : 2,
            "host" : "mongodb-rs0-2.mongodb-rs0.default.svc.cluster.local:27017",
            "arbiterOnly" : false,
            "buildIndexes" : true,
            "hidden" : false,
            "priority" : 1,
            "tags" : {

            },
            "slaveDelay" : NumberLong(0),
            "votes" : 1
        }
    ],
    "settings" : {
        "chainingAllowed" : true,
        "heartbeatIntervalMillis" : 2000,
        "heartbeatTimeoutSecs" : 10,
        "electionTimeoutMillis" : 10000,
        "catchUpTimeoutMillis" : -1,
        "catchUpTakeoverDelayMillis" : 30000,
        "getLastErrorModes" : {

        },
        "getLastErrorDefaults" : {
            "w" : 1,
            "wtimeout" : 0
        },
        "replicaSetId" : ObjectId("5b449c2f9269bb1a807a8cdf")
    }
}
rs0:SECONDARY> rs.status()
{
    "set" : "rs0",
    "date" : ISODate("2018-07-10T11:47:48.474Z"),
    "myState" : 1,
    "term" : NumberLong(1),
    "heartbeatIntervalMillis" : NumberLong(2000),
    "optimes" : {
        "lastCommittedOpTime" : {
            "ts" : Timestamp(1531223260, 1),
            "t" : NumberLong(1)
        },
        "readConcernMajorityOpTime" : {
            "ts" : Timestamp(1531223260, 1),
            "t" : NumberLong(1)
        },
        "appliedOpTime" : {
            "ts" : Timestamp(1531223260, 1),
            "t" : NumberLong(1)
        },
        "durableOpTime" : {
            "ts" : Timestamp(1531223260, 1),
            "t" : NumberLong(1)
        }
    },
    "members" : [
        {
            "_id" : 0,
            "name" : "mongodb-rs0-0.mongodb-rs0.default.svc.cluster.local:27017",
            "health" : 1,
            "state" : 1,
            "stateStr" : "PRIMARY",
            "uptime" : 381,
            "optime" : {
                "ts" : Timestamp(1531223260, 1),
                "t" : NumberLong(1)
            },
            "optimeDate" : ISODate("2018-07-10T11:47:40Z"),
            "electionTime" : Timestamp(1531223098, 1),
            "electionDate" : ISODate("2018-07-10T11:44:58Z"),
            "configVersion" : 1,
            "self" : true
        },
        {
            "_id" : 1,
            "name" : "mongodb-rs0-1.mongodb-rs0.default.svc.cluster.local:27017",
            "health" : 1,
            "state" : 2,
            "stateStr" : "SECONDARY",
            "uptime" : 181,
            "optime" : {
                "ts" : Timestamp(1531223260, 1),
                "t" : NumberLong(1)
            },
            "optimeDurable" : {
                "ts" : Timestamp(1531223260, 1),
                "t" : NumberLong(1)
            },
            "optimeDate" : ISODate("2018-07-10T11:47:40Z"),
            "optimeDurableDate" : ISODate("2018-07-10T11:47:40Z"),
            "lastHeartbeat" : ISODate("2018-07-10T11:47:46.599Z"),
            "lastHeartbeatRecv" : ISODate("2018-07-10T11:47:47.332Z"),
            "pingMs" : NumberLong(0),
            "syncingTo" : "mongodb-rs0-0.mongodb-rs0.default.svc.cluster.local:27017",
            "configVersion" : 1
        },
        {
            "_id" : 2,
            "name" : "mongodb-rs0-2.mongodb-rs0.default.svc.cluster.local:27017",
            "health" : 1,
            "state" : 2,
            "stateStr" : "SECONDARY",
            "uptime" : 181,
            "optime" : {
                "ts" : Timestamp(1531223260, 1),
                "t" : NumberLong(1)
            },
            "optimeDurable" : {
                "ts" : Timestamp(1531223260, 1),
                "t" : NumberLong(1)
            },
            "optimeDate" : ISODate("2018-07-10T11:47:40Z"),
            "optimeDurableDate" : ISODate("2018-07-10T11:47:40Z"),
            "lastHeartbeat" : ISODate("2018-07-10T11:47:46.599Z"),
            "lastHeartbeatRecv" : ISODate("2018-07-10T11:47:47.283Z"),
            "pingMs" : NumberLong(0),
            "syncingTo" : "mongodb-rs0-0.mongodb-rs0.default.svc.cluster.local:27017",
            "configVersion" : 1
        }
    ],
    "ok" : 1,
    "operationTime" : Timestamp(1531223260, 1),
    "$clusterTime" : {
        "clusterTime" : Timestamp(1531223260, 1),
        "signature" : {
            "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
            "keyId" : NumberLong(0)
        }
    }
}

Three-Node Replica Set: Primary, Secondary, Arbiter

If your replica set has an even number of members, add an arbiter to obtain a majority of votes in an election for primary. Arbiters do not require dedicated hardware.

ref:
https://docs.mongodb.com/v3.6/tutorial/add-replica-set-arbiter/

Issues

Change Replica Set Name

  1. Start mongod without --replSet
  2. Run db.system.replset.remove({_id: 'oldReplicaSetName'}) in MongoDB Shell
  3. Start mongod with --replSet "newReplicaSetName"

ref:
https://stackoverflow.com/questions/33400607/how-do-i-rename-a-mongodb-replica-set

InvalidReplicaSetConfig: Our replica set configuration is invalid or does not include us

$ kubectl logs -f mongodb-rs0-0
REPL_HB [replexec-10] Error in heartbeat (requestId: 20048) to mongodb-rs0-2.mongodb-rs0:27017, response status: InvalidReplicaSetConfig: Our replica set configuration is invalid or does not include us
$ mongo mongodb-rs0-2.mongodb-rs0.default.svc.cluster.local
rs0:OTHER> rs.status()
{
    "state" : 10,
    "stateStr" : "REMOVED",
    "uptime" : 631,
    "optime" : {
        "ts" : Timestamp(1531224140, 1),
        "t" : NumberLong(1)
    },
    "optimeDate" : ISODate("2018-07-10T12:02:20Z"),
    "ok" : 0,
    "errmsg" : "Our replica set config is invalid or we are not a member of it",
    "code" : 93,
    "codeName" : "InvalidReplicaSetConfig",
    "operationTime" : Timestamp(1531224140, 1),
    "$clusterTime" : {
        "clusterTime" : Timestamp(1531224790, 1),
        "signature" : {
            "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
            "keyId" : NumberLong(0)
        }
    }
}

$ mongo mongodb-rs0-0.mongodb-rs0.default.svc.cluster.local
rs0:PRIMARY> rs.conf() 
{
    "_id" : "rs0",
    "version" : 9,
    "protocolVersion" : NumberLong(1),
    "members" : [
        {
            "_id" : 0,
            "host" : "mongodb-rs0-0.mongodb-rs0.default.svc.cluster.local:27017",
            "arbiterOnly" : false,
            "buildIndexes" : true,
            "hidden" : false,
            "priority" : 1,
            "tags" : {

            },
            "slaveDelay" : NumberLong(0),
            "votes" : 1
        },
        {
            "_id" : 1,
            "host" : "mongodb-rs0-1.mongodb-rs0.default.svc.cluster.local:27017",
            "arbiterOnly" : false,
            "buildIndexes" : true,
            "hidden" : false,
            "priority" : 1,
            "tags" : {

            },
            "slaveDelay" : NumberLong(0),
            "votes" : 1
        },
        {
            "_id" : 2,
            "host" : "mongodb-rs0-2.mongodb-rs0.default.svc.cluster.local:27017",
            "arbiterOnly" : false,
            "buildIndexes" : true,
            "hidden" : false,
            "priority" : 1,
            "tags" : {

            },
            "slaveDelay" : NumberLong(0),
            "votes" : 1
        }
    ],
    "settings" : {
        "chainingAllowed" : true,
        "heartbeatIntervalMillis" : 2000,
        "heartbeatTimeoutSecs" : 10,
        "electionTimeoutMillis" : 10000,
        "catchUpTimeoutMillis" : -1,
        "catchUpTakeoverDelayMillis" : 30000,
        "getLastErrorModes" : {

        },
        "getLastErrorDefaults" : {
            "w" : 1,
            "wtimeout" : 0
        },
        "replicaSetId" : ObjectId("5b449c2f9269bb1a807a8cdf")
    }
}

The faulty member's state is REMOVED (it was once in a replica set but was subsequently removed) and shows Our replica set config is invalid or we are not a member of it. In fact, the real issue is that the removed node is still in the list of replica set members.

You could just manually remove the broken node from the replica set on the primary, restart the node, and re-add the node.

$ mongo mongodb-rs0-0.mongodb-rs0.default.svc.cluster.local
rs0:PRIMARY> rs.remove("mongodb-rs0-2.mongodb-rs0.default.svc.cluster.local:27017")

# restart the Pod
$ kubectl delete mongodb-rs0-2

$ mongo mongodb-rs0-0.mongodb-rs0.default.svc.cluster.local
rs0:PRIMARY> rs.add("mongodb-rs0-2.mongodb-rs0.default.svc.cluster.local:27017")

ref:
https://stackoverflow.com/questions/47439781/mongodb-replica-set-member-state-is-other
https://docs.mongodb.com/v3.6/tutorial/remove-replica-set-member/
https://docs.mongodb.com/manual/reference/replica-states/

db.isMaster(): Does not have a valid replica set config

rs0:OTHER> db.isMaster()
{
    "hosts" : [
        "mongodb-rs0-0.mongodb-rs0.default.svc.cluster.local:27017",
        "mongodb-rs0-1.mongodb-rs0.default.svc.cluster.local:27017",
        "mongodb-rs0-2.mongodb-rs0.default.svc.cluster.local27017"
    ],
    "setName" : "rs0",
    "ismaster" : false,
    "secondary" : false,
    "info" : "Does not have a valid replica set config",
    "isreplicaset" : true,
    "maxBsonObjectSize" : 16777216,
    "maxMessageSizeBytes" : 48000000,
    "maxWriteBatchSize" : 100000,
    "localTime" : ISODate("2018-07-10T14:34:48.640Z"),
    "logicalSessionTimeoutMinutes" : 30,
    "minWireVersion" : 0,
    "maxWireVersion" : 6,
    "readOnly" : false,
    "ok" : 1,
    "operationTime" : Timestamp(1531232610, 1),
    "$clusterTime" : {
        "clusterTime" : Timestamp(1531232610, 1),
        "signature" : {
            "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
            "keyId" : NumberLong(0)
        }
    }
}

You could just re-configure the replica set and only keep reachable members.

rs0:OTHER> oldConf = rs.conf()
rs0:OTHER> oldConf.members = [oldConf.members[0]]
rs0:OTHER> rs.reconfig(oldConf, {force: true})
rs0:PRIMARY> rs.add("mongodb-rs0-1.mongodb-rs0.default.svc.cluster.local:27017")
rs0:PRIMARY> rs.add("mongodb-rs0-2.mongodb-rs0.default.svc.cluster.local:27017")

ref:
https://docs.mongodb.com/v3.6/tutorial/reconfigure-replica-set-with-unavailable-members/

Change Replica Set Name

  1. Stop mongod
  2. Start mongod --bind_ip_all --port 27017 --dbpath /data/db without --replSet
  3. Remove the old Replica Set name
use admin
db.getCollection('system.version').remove({_id: 'shardIdentity'})

use local
db.getCollection('system.replset').remove({_id: 'rs0'})
  1. Start mongod --bind_ip_all --port 27017 --dbpath /data/db --shardsvr --replSet sh0

ref:
https://stackoverflow.com/questions/33400607/how-do-i-rename-a-mongodb-replica-set

Connect To A Replica Set Cluster

ref:
https://api.mongodb.com/python/current/examples/high_availability.html

Use Connection Pools

ref:
https://api.mongodb.com/python/current/faq.html#how-does-connection-pooling-work-in-pymongo

IPFS: The (Very Slow) Distributed Permanent Web

IPFS: The (Very Slow) Distributed Permanent Web

IPFS stands for InterPlanetary File System, but you could simply consider it as a distributed, permanent, but ridiculously slow, not properly functioning version of web. You could upload any static file and static website to IPFS. And the whole swarm would probably distribute your files to the moon, that might be why IPFS is so fucking slow.

ref:
https://ipfs.io/

Installation

Install on macOS.

$ brew install ipfs

Start your IPFS node.

$ ipfs init
initializing IPFS node at /Users/vinta/.ipfs
generating 2048-bit RSA keypair... done
peer identity: QmfNy1th16zscbpxe8Q2EQdQkNFn7Y3Rp9kGZWL1EQDyw6

$ ipfs daemon

ref:
https://ipfs.io/docs/commands/#ipfs-init
https://ipfs.io/docs/commands/#ipfs-daemon

Furthermore, you might want to run your IPFS node in a Docker container.

# docker-compose.yml
version: "3"
services:
    ipfs:
        image: ipfs/go-ipfs:v0.4.15
        working_dir: /export
        ports:
            - "4001:4001" # Swarm
            - "5001:5001" # web UI
            - "8080:8080" # HTTP proxy
        volumes:
            - "~/.ipfs:/data/ipfs"
            - "~/.ipfs/export:/export"

ref:
https://hub.docker.com/r/ipfs/go-ipfs/

Usage

Show Node Info

$ ipfs id
{
    "ID": "QmfNy1th16zscbpxe8Q2EQdQkNFn7Y3Rp9kGZWL1EQDyw6",
    "PublicKey": "A_LONG_LONG_LONG_KEY,
    "Addresses": [
        "/ip4/127.0.0.1/tcp/4001/ipfs/QmfNy1th16zscbpxe8Q2EQdQkNFn7Y3Rp9kGZWL1EQDyw6",
        "/ip4/172.19.0.2/tcp/4001/ipfs/QmfNy1th16zscbpxe8Q2EQdQkNFn7Y3Rp9kGZWL1EQDyw6"
    ],
    "AgentVersion": "go-ipfs/0.4.14/5db3846",
    "ProtocolVersion": "ipfs/0.1.0"
}

ref:
https://ipfs.io/docs/getting-started/

Add Other Nodes to Your Bootstrap List

This one is from Muzeum, https://muzeum.pro/.

$ ipfs bootstrap add /ip4/52.221.121.238/tcp/4001/ipfs/QmTKYdZDkqHiY24kPynSmKbmRdk7cJxWsvvfvvvZArQ1N9

# you could also connect to a node directly
$ ipfs swarm connect /ip4/52.221.121.238/tcp/4001/ipfs/QmTKYdZDkqHiY24kPynSmKbmRdk7cJxWsvvfvvvZArQ1N9

ref:
https://ipfs.io/docs/commands/#ipfs-bootstrap
https://ipfs.io/docs/commands/#ipfs-swarm

Add Files to IPFS

Every IPFS node's default storage is 10GB, and a single node could only store data it needs, which also means each node only stores a small amount of whole data on IPFS. If there is not enough nodes, your data might be distributed to no one except your own node.

Your content is automatically pinned when you ipfs add it.

$ ipfs add -r mysite
added QmRticJ3P5fnb9GGnUj3U9XMkYvGEnv9AQfk6YmgRhivYA mysite/index.html
added QmY9cxiHqTFoWamkQVkpmmqzBrY3hCBEL2XNu3NtX74Fuu mysite/readme.md
added QmTLhFgeWLacpbiGNYmhchHGQAhfNyDZcLt5akJFFLV89V mysite

If files/folders under the folder change, the hash of the folder changes too.

$ vim mysite/index.html
$ ipfs add -r mysite
added QmQTTe3deLfeULKjPHnQTcyFuCmY5JZiwSTiPT4nSt1KVK mysite/index.html # changed
added QmS85tb3aKQNurFm51FaxtK6NyNei4ej3gDR21baDZXRoU mysite            # changed

ref:
https://ipfs.io/docs/commands/#ipfs-add

Pin Files from IPFS

Pinning means storing IPFS files on local node, and prevent them from getting garbage collected. Also, you could access them much more quickly. You only need to do ipfs pin add to pin contents someone else uploaded.

$ ipfs pin add -r --progress /ipns/ipfs.soundscape.net/

$ ipfs pin add --progress /ipns/ipfs.soundscape.net/music_group/index.json
pinned QmZwTEhdjT4MyvEnWndVEJzBjp8zGGZH1cEBpshBQs75rY recursively

$ ipfs pin add --progress /ipns/ipfs.soundscape.net/music_album/index.json
pinned QmSAuGU5xt5SdR2ca2EDgeHFATSrAQhTfTYpYs9K9qmqED recursively

$ ipfs pin add --progress /ipns/ipfs.soundscape.net/music_recording/index.json
pinned QmcTiadA9jRMXx77tydPa6492QJAtjXkKkA4gERaFksy94 recursively

$ ipfs pin add --progress /ipns/ipfs.soundscape.net/music_composition/index.json
pinned QmTfqVaGVRnaPRQgYypGYXUvTK1UcDfK5VWYvU4rwK3m26 recursively

P.S. Sometimes when I ipfs pin add a file which is not on my node, the command just hangs there. I'm not sure why that once I access the file first (through curl or any browser), then ipfs pin add works fine. But it does not make sense: if I already get/access/download the file, I could just ipfs add the file and it would be automatically pinned.

ref:
https://ipfs.io/docs/commands/#ipfs-pin

Get Files

You have several ways to get files or folders from IPFS:

  • ipfs get dir-hash -o readable-dir-name
    • ipfs get QmbMQNcg8TTo5dXZPtuxbns1XVq6cZJaa7vNqZzeJpKwfk -o mysite
  • ipfs get file-hash -o readable-file-name.ext
    • ipfs get Qmd286K6pohQcTKYqnS1YhWrCiS4gz7Xi34sdwMe9USZ7u -o cat.jpg
  • ipfs get /ipfs/dir-hash/path/to/file.txt
    • ipfs get /ipfs/QmYwAPJzv5CZsnA625s3Xf2nemtYgPpHdWEz79ojWnPbdG/readme
  • ipfs get /ipns/example.com/path/to/file.txt
    • ipfs get /ipns/ipfs.soundscape.net/music_group/index.json

You could also access IPFS files through any public gateway:

  • curl https://ipfs.io/ipns/peer-id/path/to/file.txt
    • curl https://ipfs.io/ipns/QmfNy1th16zscbpxe8Q2EQdQkNFn7Y3Rp9kGZWL1EQDyw6/index.html
  • curl https://ipfs.io/ipns/example.com/path/to/file.txt
    • curl https://ipfs.io/ipns/ipfs.soundscape.net/music_group/index.json
  • curl http://127.0.0.1:8080/ipns/example.com/path/to/file.txt
    • curl http://127.0.0.1:8080/ipns/ipfs.soundscape.net/music_group/index.json

Download IPFS objects with ipfs get.

$ ipfs ls QmW2WQi7j6c7UgJTarActp7tDNikE4B2qXtFCfLPdsgaTQ
Qmd286K6pohQcTKYqnS1YhWrCiS4gz7Xi34sdwMe9USZ7u 443362 cat.jpg

# you could get a folder
$ ipfs get QmW2WQi7j6c7UgJTarActp7tDNikE4B2qXtFCfLPdsgaTQ
$ ls QmW2WQi7j6c7UgJTarActp7tDNikE4B2qXtFCfLPdsgaTQ
cat.jpg

# as well as a file
$ ipfs get Qmd286K6pohQcTKYqnS1YhWrCiS4gz7Xi34sdwMe9USZ7u -o cat.jpg

# get files and rename them
$ mkdir -p soundscape/music_group/ soundscape/music_album/ soundscape/music_recording/ soundscape/music_composition/ && 
  ipfs get /ipns/ipfs.soundscape.net/music_group/index.json -o soundscape/music_group/index.json; 
  ipfs get /ipns/ipfs.soundscape.net/music_album/index.json -o soundscape/music_album/index.json; 
  ipfs get /ipns/ipfs.soundscape.net/music_recording/index.json -o soundscape/music_recording/index.json; 
  ipfs get /ipns/ipfs.soundscape.net/music_composition/index.json -o soundscape/music_composition/index.json

# get whole folders
$ ipfs get /ipns/ipfs.soundscape.net/music_group; 
  ipfs get /ipns/ipfs.soundscape.net/music_album; 
  ipfs get /ipns/ipfs.soundscape.net/music_recording; 
  ipfs get /ipns/ipfs.soundscape.net/music_composition

ref:
https://ipfs.io/docs/commands/#ipfs-get
https://discuss.ipfs.io/t/trying-to-better-understand-the-pinning-concept/754

Display IPFS object data with ipfs cat.

$ ipfs cat Qmd286K6pohQcTKYqnS1YhWrCiS4gz7Xi34sdwMe9USZ7u > cat.jpg
$ ipfs cat QmS4ustL54uo8FzR9455qaxZwuMiUhyvMcX9Ba8nUH4uVv/readme

Publish a Website to IPNS

IPNS stands for InterPlanetary Naming System.

Everytime you change files under a folder, the hash of the folder also changes. So you need a static reference which always points to the latest hash of your folder. You could publish your static website (a folder) to IPNS with the static reference, which is your peer ID as well as the hash of your public key.

By default, every IPFS node has only one pair of private and public key. Therefore, you could only publish one folder with your peer ID. But you could add new keypairs through ipfs key gen and publish multiple folders.

$ ipfs add -r mysite
added QmeqHWZgvgx5C7T6DakX75CJDRgAUoSDZayLYrcnAP8Fma mysite/index.html
added QmUtuRphD9rJgRkfxwj7DcyFEAcSeH3Q1fK8nHxxoDiKK5 mysite

$ ipfs name publish QmUtuRphD9rJgRkfxwj7DcyFEAcSeH3Q1fK8nHxxoDiKK5
published to QmfNy1th16zscbpxe8Q2EQdQkNFn7Y3Rp9kGZWL1EQDyw6: /ipfs/QmUtuRphD9rJgRkfxwj7DcyFEAcSeH3Q1fK8nHxxoDiKK5

$ ipfs name resolve QmfNy1th16zscbpxe8Q2EQdQkNFn7Y3Rp9kGZWL1EQDyw6
/ipfs/QmUtuRphD9rJgRkfxwj7DcyFEAcSeH3Q1fK8nHxxoDiKK5

Click following links to see contents.

After you change something, publish it again with new hash.

$ vim mysite/index.html
$ ipfs add -r mysite
added QmNjbhdks8RUgDt6QiNFe5QGe2HrbCsq5FKda9D9hLVkkU mysite/index.html # changed
added QmbMQNcg8TTo5dXZPtuxbns1XVq6cZJaa7vNqZzeJpKwfk mysite            # changed

$ ipfs name publish QmbMQNcg8TTo5dXZPtuxbns1XVq6cZJaa7vNqZzeJpKwfk
published to QmfNy1th16zscbpxe8Q2EQdQkNFn7Y3Rp9kGZWL1EQDyw6: /ipfs/QmbMQNcg8TTo5dXZPtuxbns1XVq6cZJaa7vNqZzeJpKwfk

ref:
https://ipfs.io/docs/commands/#ipfs-name

Create a Domain Name Alias for Your Peer ID

The hash is not very friendly for humans. Fortunately, you could and probably should associate a domain name with your peer ID.

First, you need to add a TXT record whose value is dnslink=/ipns/YOUR_PEER_ID to your domain name. In the following article, we assume the domain name you choose is ipfs.kittenphile.com.

$ dig +short TXT ipfs.kittenphile.com
"dnslink=/ipns/QmfNy1th16zscbpxe8Q2EQdQkNFn7Y3Rp9kGZWL1EQDyw6"

$ ipfs name resolve -r ipfs.kittenphile.com
/ipfs/QmaE2DcNxGjPGPfzfTQuTBTW9D57abVSv319WqC89Av1y1

Click following links to see contents.

ref:
https://ipfs.io/docs/examples/example-viewer/example#../websites/README.md
https://hackernoon.com/ten-terrible-attempts-to-make-the-inter-planetary-file-system-human-friendly-e4e95df0c6fa

Public Gateway

If you have a public gateway and people retrieve files through it. Your public gateway fetches and stores the data, but it doesn't pin them. Files get removed with the next garbage collection run.

ref:
https://discuss.ipfs.io/t/public-facing-gateway-and-pinning/449

Pipenv and Pipfile: The officially recommended Python packaging tool

Pipenv and Pipfile: The officially recommended Python packaging tool

You no longer need to use pip and virtualenv separately. Use pipenv instead.

ref:
https://github.com/pypa/pipenv
https://pipenv.kennethreitz.org/en/latest/

Install

$ pip install pipenv

ref:
https://pipenv.kennethreitz.org/en/latest/install/#installing-pipenv

Usage

$ pyenv global 3.7.4

# initialize project virtualenv with a specific Python version
# automatically generate both Pipfile and Pipfile.lock from requirements.txt if it exists
$ pipenv --three

$ cd /path/to/project-contains-Pipfile
$ pipenv install

$ pipenv install pangu
$ pipenv install -r requirements.txt

# install packages to dev-packages
$ pipenv install --dev 
autopep8 
flake8 
flake8-bandit 
flake8-blind-except 
flake8-bugbear 
flake8-builtins 
flake8-comprehensions 
flake8-debugger 
flake8-mutable 
flake8-pep3101 
flake8-print 
flake8-string-format 
ipdb 
jedi 
mypy 
pep8-naming 
ptvsd 
pylint 
pylint-celery 
pylint-common 
pylint-flask 
pytest 
watchdog

# switch your shell environment to project virtualenv
$ pipenv shell
$ exit

# uninstall everything
$ pipenv uninstall --all

# remove project virtualenv
$ pipenv --rm

ref:
https://pipenv.kennethreitz.org/en/latest/install/

Example Pipfile

[[source]]
url = "https://pypi.python.org/simple" 
verify_ssl = true 
name = "pypi" 

[requires] 
python_version = "3.7"

[packages] 
celery = "==4.2.1"
flask = "==1.0.2"
requests = ">=2.0.0" 

[dev-packages] 
flake8 = "*" 
ipdb = "*" 
pylint = "*" 

[scripts]
web = "python -m flask run -h 0.0.0.0"
worker = "celery -A app:celery worker --pid= -l info -E --purge"
scheduler = "celery -A app:celery beat -l info --pid="
shell = "flask shell"

ref:
https://pipenv.kennethreitz.org/en/latest/basics/#example-pipfile-pipfile-lock

Print traceback call stack in Python

Print traceback call stack in Python

Extract Traceback From An Exception

try:
    do_shit()
except Exception as exc:
    print('----- start -----')
    tb = _hx_e.__traceback__
    raise RuntimeError().with_traceback(tb)
    print('----- end -----')

ref:
https://stackoverflow.com/questions/11414894/extract-traceback-info-from-an-exception-object

Print Traceback Without Raising An Exception

print('----- start -----')
import traceback; traceback.print_stack()
print('----- end -----')

# or

import traceback
for line in traceback.format_stack():
    print(line.strip())

The result would be like:

>>> import qingcloud.iaas
>>> conn = qingcloud.iaas.connect_to_zone('pek2', '123', '456')
>>> conn.describe_instances(limit=1)
----- start -----
  File "<stdin>", line 1, in <module>
  File "qingcloud/iaas/connection.py", line 214, in describe_instances
    return self.send_request(action, body)
  File "qingcloud/iaas/connection.py", line 42, in send_request
    resp = self.send(url, request, verb)
  File "qingcloud/conn/connection.py", line 245, in send
    request.authorize(self)
  File "qingcloud/conn/connection.py", line 156, in authorize
    connection._auth_handler.add_auth(self, **kwargs)
  File "qingcloud/conn/auth.py", line 118, in add_auth
    import traceback; traceback.print_stack()
----- end -----

ref:
https://stackoverflow.com/questions/3925248/print-python-stack-trace-without-exception-being-raised